From f0287092a53c4a27f0e6fe3940768bd25a8835fc Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 28 Apr 2022 11:57:47 -0600
Subject: [PATCH 001/151] Add naive implementation of stump_topk

---
 tests/naive.py | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)

diff --git a/tests/naive.py b/tests/naive.py
index 4089e603e..8f3a05da8 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1716,3 +1716,74 @@ def _total_diagonal_ndists(tile_lower_diag, tile_upper_diag, tile_height, tile_w
         )
 
     return total_ndists
+
+
+def stump_topk(T_A, m, T_B=None, exclusion_zone=None, k=1):
+    """
+    Traverse distance matrix along the diagonals and update the top-k
+    nearest neigbors matrix profile and matrix profile indices
+    """
+    if T_B is None:  # self-join:
+        ignore_trivial = True
+        distance_matrix = np.array(
+            [distance_profile(Q, T_A, m) for Q in core.rolling_window(T_A, m)]
+        )
+        T_B = T_A.copy()
+    else:
+        ignore_trivial = False
+        distance_matrix = np.array(
+            [distance_profile(Q, T_B, m) for Q in core.rolling_window(T_A, m)]
+        )
+
+    distance_matrix[np.isnan(distance_matrix)] = np.inf
+
+    n_A = T_A.shape[0]
+    n_B = T_B.shape[0]
+    l = n_A - m + 1
+    if exclusion_zone is None:
+        exclusion_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
+
+    if ignore_trivial:
+        diags = np.arange(exclusion_zone + 1, n_A - m + 1)
+    else:
+        diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1)
+
+    # the last two columns in P and I are to keep track of right and left mp for 1NN
+    P = np.full((l, k + 2), np.inf)
+    I = np.full((l, k + 2), -1, dtype=np.int64)
+
+    for g in diags:
+        if g >= 0:
+            iter_range = range(0, min(n_A - m + 1, n_B - m + 1 - g))
+        else:
+            iter_range = range(-k, min(n_A - m + 1, n_B - m + 1 - g))
+
+        for i in iter_range:
+            D = distance_matrix[i, i + g]
+            if D < P[i, k - 1]:
+                idx = np.searchsorted(P[i, :k], D, side='right')
+                P[i, :k] = np.insert(P[i, :k], idx, D)[:-1]
+                I[i, :k] = np.insert(I[i, :k], idx, i + g)[:-1]
+
+            if ignore_trivial:  # Self-joins only
+                if D < P[i + g, k - 1]:
+                    idx = np.searchsorted(P[i + g, :k], D, side='right')
+                    P[i + g, :k] = np.insert(P[i + g, :k], idx, D)[:-1]
+                    I[i + g, :k] = np.insert(I[i + g, :k], idx, i)[:-1]
+
+                if i < i + g:
+                    # Left matrix profile and left matrix profile index
+                    if D < P[i + g, k]:
+                        P[i + g, k] = D
+                        I[i + g, k] = i
+
+                    if D < P[i, k + 1]:
+                        # right matrix profile and right matrix profile index
+                        P[i, k + 1] = D
+                        I[i, k + 1] = i + g
+
+    result = np.empty((l, 2 * k + 2), dtype=object)
+    result[:, :k] = P[:, :k]
+    result[:, k:] = I[:, :]
+
+    return result

From e893873fc763a944b3d7e414d23e116762ee6693 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 28 Apr 2022 12:10:32 -0600
Subject: [PATCH 002/151] Copy test_stump code to test_stump_topk

---
 tests/test_stump_topk.py | 242 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 242 insertions(+)
 create mode 100644 tests/test_stump_topk.py

diff --git a/tests/test_stump_topk.py b/tests/test_stump_topk.py
new file mode 100644
index 000000000..d3475122f
--- /dev/null
+++ b/tests/test_stump_topk.py
@@ -0,0 +1,242 @@
+import numpy as np
+import numpy.testing as npt
+import pandas as pd
+from stumpy import stump, config
+import pytest
+import naive
+
+
+test_data = [
+    (
+        np.array([9, 8100, -60, 7], dtype=np.float64),
+        np.array([584, -11, 23, 79, 1001, 0, -19], dtype=np.float64),
+    ),
+    (
+        np.random.uniform(-1000, 1000, [8]).astype(np.float64),
+        np.random.uniform(-1000, 1000, [64]).astype(np.float64),
+    ),
+]
+
+substitution_locations = [(slice(0, 0), 0, -1, slice(1, 3), [0, 3])]
+substitution_values = [np.nan, np.inf]
+
+
+def test_stump_int_input():
+    with pytest.raises(TypeError):
+        stump(np.arange(10), 5)
+
+
+@pytest.mark.parametrize("T_A, T_B", test_data)
+def test_stump_self_join(T_A, T_B):
+    m = 3
+    zone = int(np.ceil(m / 4))
+    ref_mp = naive.stump(T_B, m, exclusion_zone=zone)
+    comp_mp = stump(T_B, m, ignore_trivial=True)
+    naive.replace_inf(ref_mp)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp, comp_mp)
+
+    comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp, comp_mp)
+
+
+@pytest.mark.parametrize("T_A, T_B", test_data)
+def test_stump_A_B_join(T_A, T_B):
+    m = 3
+    ref_mp = naive.stump(T_A, m, T_B=T_B)
+    comp_mp = stump(T_A, m, T_B, ignore_trivial=False)
+    naive.replace_inf(ref_mp)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp, comp_mp)
+
+    comp_mp = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp, comp_mp)
+
+
+def test_stump_constant_subsequence_self_join():
+    T_A = np.concatenate((np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64)))
+    m = 3
+    zone = int(np.ceil(m / 4))
+    ref_mp = naive.stump(T_A, m, exclusion_zone=zone)
+    comp_mp = stump(T_A, m, ignore_trivial=True)
+    naive.replace_inf(ref_mp)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
+
+    comp_mp = stump(pd.Series(T_A), m, ignore_trivial=True)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
+
+
+def test_stump_one_constant_subsequence_A_B_join():
+    T_A = np.random.rand(20)
+    T_B = np.concatenate((np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64)))
+    m = 3
+    ref_mp = naive.stamp(T_A, m, T_B=T_B)
+    comp_mp = stump(T_A, m, T_B, ignore_trivial=False)
+    naive.replace_inf(ref_mp)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
+
+    comp_mp = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
+
+    # Swap inputs
+    ref_mp = naive.stamp(T_B, m, T_B=T_A)
+    comp_mp = stump(T_B, m, T_A, ignore_trivial=False)
+    naive.replace_inf(ref_mp)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
+
+
+def test_stump_two_constant_subsequences_A_B_join():
+    T_A = np.concatenate(
+        (np.zeros(10, dtype=np.float64), np.ones(10, dtype=np.float64))
+    )
+    T_B = np.concatenate((np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64)))
+    m = 3
+    ref_mp = naive.stamp(T_A, m, T_B=T_B)
+    comp_mp = stump(T_A, m, T_B, ignore_trivial=False)
+    naive.replace_inf(ref_mp)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
+
+    comp_mp = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
+
+    # Swap inputs
+    ref_mp = naive.stamp(T_B, m, T_B=T_A)
+    comp_mp = stump(T_B, m, T_A, ignore_trivial=False)
+    naive.replace_inf(ref_mp)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
+
+    comp_mp = stump(pd.Series(T_B), m, pd.Series(T_A), ignore_trivial=False)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
+
+
+def test_stump_identical_subsequence_self_join():
+    identical = np.random.rand(8)
+    T_A = np.random.rand(20)
+    T_A[1 : 1 + identical.shape[0]] = identical
+    T_A[11 : 11 + identical.shape[0]] = identical
+    m = 3
+    zone = int(np.ceil(m / 4))
+    ref_mp = naive.stamp(T_A, m, exclusion_zone=zone)
+    comp_mp = stump(T_A, m, ignore_trivial=True)
+    naive.replace_inf(ref_mp)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(
+        ref_mp[:, 0], comp_mp[:, 0], decimal=config.STUMPY_TEST_PRECISION
+    )  # ignore indices
+
+    comp_mp = stump(pd.Series(T_A), m, ignore_trivial=True)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(
+        ref_mp[:, 0], comp_mp[:, 0], decimal=config.STUMPY_TEST_PRECISION
+    )  # ignore indices
+
+
+def test_stump_identical_subsequence_A_B_join():
+    identical = np.random.rand(8)
+    T_A = np.random.rand(20)
+    T_B = np.random.rand(20)
+    T_A[1 : 1 + identical.shape[0]] = identical
+    T_B[11 : 11 + identical.shape[0]] = identical
+    m = 3
+    ref_mp = naive.stamp(T_A, m, T_B=T_B)
+    comp_mp = stump(T_A, m, T_B, ignore_trivial=False)
+    naive.replace_inf(ref_mp)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(
+        ref_mp[:, 0], comp_mp[:, 0], decimal=config.STUMPY_TEST_PRECISION
+    )  # ignore indices
+
+    comp_mp = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(
+        ref_mp[:, 0], comp_mp[:, 0], decimal=config.STUMPY_TEST_PRECISION
+    )  # ignore indices
+
+    # Swap inputs
+    ref_mp = naive.stamp(T_B, m, T_B=T_A)
+    comp_mp = stump(T_B, m, T_A, ignore_trivial=False)
+    naive.replace_inf(ref_mp)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(
+        ref_mp[:, 0], comp_mp[:, 0], decimal=config.STUMPY_TEST_PRECISION
+    )  # ignore indices
+
+
+@pytest.mark.parametrize("T_A, T_B", test_data)
+@pytest.mark.parametrize("substitute_B", substitution_values)
+@pytest.mark.parametrize("substitution_locations", substitution_locations)
+def test_stump_nan_inf_self_join(T_A, T_B, substitute_B, substitution_locations):
+    m = 3
+
+    T_B_sub = T_B.copy()
+
+    for substitution_location_B in substitution_locations:
+        T_B_sub[:] = T_B[:]
+        T_B_sub[substitution_location_B] = substitute_B
+
+        zone = int(np.ceil(m / 4))
+        ref_mp = naive.stamp(T_B_sub, m, exclusion_zone=zone)
+        comp_mp = stump(T_B_sub, m, ignore_trivial=True)
+        naive.replace_inf(ref_mp)
+        naive.replace_inf(comp_mp)
+        npt.assert_almost_equal(ref_mp, comp_mp)
+
+        comp_mp = stump(pd.Series(T_B_sub), m, ignore_trivial=True)
+        naive.replace_inf(comp_mp)
+        npt.assert_almost_equal(ref_mp, comp_mp)
+
+
+@pytest.mark.parametrize("T_A, T_B", test_data)
+@pytest.mark.parametrize("substitute_A", substitution_values)
+@pytest.mark.parametrize("substitute_B", substitution_values)
+@pytest.mark.parametrize("substitution_locations", substitution_locations)
+def test_stump_nan_inf_A_B_join(
+    T_A, T_B, substitute_A, substitute_B, substitution_locations
+):
+    m = 3
+
+    T_A_sub = T_A.copy()
+    T_B_sub = T_B.copy()
+
+    for substitution_location_B in substitution_locations:
+        for substitution_location_A in substitution_locations:
+            T_A_sub[:] = T_A[:]
+            T_B_sub[:] = T_B[:]
+            T_A_sub[substitution_location_A] = substitute_A
+            T_B_sub[substitution_location_B] = substitute_B
+
+            ref_mp = naive.stamp(T_A_sub, m, T_B=T_B_sub)
+            comp_mp = stump(T_A_sub, m, T_B_sub, ignore_trivial=False)
+            naive.replace_inf(ref_mp)
+            naive.replace_inf(comp_mp)
+            npt.assert_almost_equal(ref_mp, comp_mp)
+
+            comp_mp = stump(
+                pd.Series(T_A_sub), m, pd.Series(T_B_sub), ignore_trivial=False
+            )
+            naive.replace_inf(comp_mp)
+            npt.assert_almost_equal(ref_mp, comp_mp)
+
+
+def test_stump_nan_zero_mean_self_join():
+    T = np.array([-1, 0, 1, np.inf, 1, 0, -1])
+    m = 3
+
+    zone = int(np.ceil(m / 4))
+    ref_mp = naive.stamp(T, m, exclusion_zone=zone)
+    comp_mp = stump(T, m, ignore_trivial=True)
+
+    naive.replace_inf(ref_mp)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp, comp_mp)

From 986311f78dae7ca90db29a793d43fa23b0a3afe4 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 28 Apr 2022 12:15:46 -0600
Subject: [PATCH 003/151] change replace naive.stump with naive.stump_topk

---
 tests/test_stump_topk.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tests/test_stump_topk.py b/tests/test_stump_topk.py
index d3475122f..290487460 100644
--- a/tests/test_stump_topk.py
+++ b/tests/test_stump_topk.py
@@ -28,9 +28,10 @@ def test_stump_int_input():
 
 @pytest.mark.parametrize("T_A, T_B", test_data)
 def test_stump_self_join(T_A, T_B):
+    k = 3
     m = 3
     zone = int(np.ceil(m / 4))
-    ref_mp = naive.stump(T_B, m, exclusion_zone=zone)
+    ref_mp = naive.stump_topk(T_B, m, exclusion_zone=zone, k=k)
     comp_mp = stump(T_B, m, ignore_trivial=True)
     naive.replace_inf(ref_mp)
     naive.replace_inf(comp_mp)
@@ -43,8 +44,9 @@ def test_stump_self_join(T_A, T_B):
 
 @pytest.mark.parametrize("T_A, T_B", test_data)
 def test_stump_A_B_join(T_A, T_B):
+    k = 3
     m = 3
-    ref_mp = naive.stump(T_A, m, T_B=T_B)
+    ref_mp = naive.stump_topk(T_A, m, T_B=T_B, k=k)
     comp_mp = stump(T_A, m, T_B, ignore_trivial=False)
     naive.replace_inf(ref_mp)
     naive.replace_inf(comp_mp)
@@ -57,9 +59,10 @@ def test_stump_A_B_join(T_A, T_B):
 
 def test_stump_constant_subsequence_self_join():
     T_A = np.concatenate((np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64)))
+    k = 3
     m = 3
     zone = int(np.ceil(m / 4))
-    ref_mp = naive.stump(T_A, m, exclusion_zone=zone)
+    ref_mp = naive.stump_topk(T_A, m, exclusion_zone=zone, k=k)
     comp_mp = stump(T_A, m, ignore_trivial=True)
     naive.replace_inf(ref_mp)
     naive.replace_inf(comp_mp)

From 9d8aafc3b75a051dee64aa72112dc8a3050b13b9 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 28 Apr 2022 13:04:36 -0600
Subject: [PATCH 004/151] Add self-join tests for 1NN and KNN

---
 tests/test_stump_topk.py | 202 ++-------------------------------------
 1 file changed, 7 insertions(+), 195 deletions(-)

diff --git a/tests/test_stump_topk.py b/tests/test_stump_topk.py
index 290487460..b3276b85b 100644
--- a/tests/test_stump_topk.py
+++ b/tests/test_stump_topk.py
@@ -27,8 +27,8 @@ def test_stump_int_input():
 
 
 @pytest.mark.parametrize("T_A, T_B", test_data)
-def test_stump_self_join(T_A, T_B):
-    k = 3
+def test_stump_self_join_1NN(T_A, T_B):
+    k = 1
     m = 3
     zone = int(np.ceil(m / 4))
     ref_mp = naive.stump_topk(T_B, m, exclusion_zone=zone, k=k)
@@ -42,204 +42,16 @@ def test_stump_self_join(T_A, T_B):
     npt.assert_almost_equal(ref_mp, comp_mp)
 
 
-@pytest.mark.parametrize("T_A, T_B", test_data)
-def test_stump_A_B_join(T_A, T_B):
-    k = 3
-    m = 3
-    ref_mp = naive.stump_topk(T_A, m, T_B=T_B, k=k)
-    comp_mp = stump(T_A, m, T_B, ignore_trivial=False)
-    naive.replace_inf(ref_mp)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp, comp_mp)
-
-    comp_mp = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp, comp_mp)
-
-
-def test_stump_constant_subsequence_self_join():
-    T_A = np.concatenate((np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64)))
+def test_stump_self_join_KNN(T_A, T_B):
     k = 3
     m = 3
     zone = int(np.ceil(m / 4))
-    ref_mp = naive.stump_topk(T_A, m, exclusion_zone=zone, k=k)
-    comp_mp = stump(T_A, m, ignore_trivial=True)
-    naive.replace_inf(ref_mp)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
-
-    comp_mp = stump(pd.Series(T_A), m, ignore_trivial=True)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
-
-
-def test_stump_one_constant_subsequence_A_B_join():
-    T_A = np.random.rand(20)
-    T_B = np.concatenate((np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64)))
-    m = 3
-    ref_mp = naive.stamp(T_A, m, T_B=T_B)
-    comp_mp = stump(T_A, m, T_B, ignore_trivial=False)
-    naive.replace_inf(ref_mp)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
-
-    comp_mp = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
-
-    # Swap inputs
-    ref_mp = naive.stamp(T_B, m, T_B=T_A)
-    comp_mp = stump(T_B, m, T_A, ignore_trivial=False)
-    naive.replace_inf(ref_mp)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
-
-
-def test_stump_two_constant_subsequences_A_B_join():
-    T_A = np.concatenate(
-        (np.zeros(10, dtype=np.float64), np.ones(10, dtype=np.float64))
-    )
-    T_B = np.concatenate((np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64)))
-    m = 3
-    ref_mp = naive.stamp(T_A, m, T_B=T_B)
-    comp_mp = stump(T_A, m, T_B, ignore_trivial=False)
-    naive.replace_inf(ref_mp)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
-
-    comp_mp = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
-
-    # Swap inputs
-    ref_mp = naive.stamp(T_B, m, T_B=T_A)
-    comp_mp = stump(T_B, m, T_A, ignore_trivial=False)
-    naive.replace_inf(ref_mp)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
-
-    comp_mp = stump(pd.Series(T_B), m, pd.Series(T_A), ignore_trivial=False)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
-
-
-def test_stump_identical_subsequence_self_join():
-    identical = np.random.rand(8)
-    T_A = np.random.rand(20)
-    T_A[1 : 1 + identical.shape[0]] = identical
-    T_A[11 : 11 + identical.shape[0]] = identical
-    m = 3
-    zone = int(np.ceil(m / 4))
-    ref_mp = naive.stamp(T_A, m, exclusion_zone=zone)
-    comp_mp = stump(T_A, m, ignore_trivial=True)
-    naive.replace_inf(ref_mp)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(
-        ref_mp[:, 0], comp_mp[:, 0], decimal=config.STUMPY_TEST_PRECISION
-    )  # ignore indices
-
-    comp_mp = stump(pd.Series(T_A), m, ignore_trivial=True)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(
-        ref_mp[:, 0], comp_mp[:, 0], decimal=config.STUMPY_TEST_PRECISION
-    )  # ignore indices
-
-
-def test_stump_identical_subsequence_A_B_join():
-    identical = np.random.rand(8)
-    T_A = np.random.rand(20)
-    T_B = np.random.rand(20)
-    T_A[1 : 1 + identical.shape[0]] = identical
-    T_B[11 : 11 + identical.shape[0]] = identical
-    m = 3
-    ref_mp = naive.stamp(T_A, m, T_B=T_B)
-    comp_mp = stump(T_A, m, T_B, ignore_trivial=False)
-    naive.replace_inf(ref_mp)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(
-        ref_mp[:, 0], comp_mp[:, 0], decimal=config.STUMPY_TEST_PRECISION
-    )  # ignore indices
-
-    comp_mp = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(
-        ref_mp[:, 0], comp_mp[:, 0], decimal=config.STUMPY_TEST_PRECISION
-    )  # ignore indices
-
-    # Swap inputs
-    ref_mp = naive.stamp(T_B, m, T_B=T_A)
-    comp_mp = stump(T_B, m, T_A, ignore_trivial=False)
+    ref_mp = naive.stump_topk(T_B, m, exclusion_zone=zone, k=k)
+    comp_mp = stump(T_B, m, ignore_trivial=True)
     naive.replace_inf(ref_mp)
     naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(
-        ref_mp[:, 0], comp_mp[:, 0], decimal=config.STUMPY_TEST_PRECISION
-    )  # ignore indices
-
-
-@pytest.mark.parametrize("T_A, T_B", test_data)
-@pytest.mark.parametrize("substitute_B", substitution_values)
-@pytest.mark.parametrize("substitution_locations", substitution_locations)
-def test_stump_nan_inf_self_join(T_A, T_B, substitute_B, substitution_locations):
-    m = 3
-
-    T_B_sub = T_B.copy()
-
-    for substitution_location_B in substitution_locations:
-        T_B_sub[:] = T_B[:]
-        T_B_sub[substitution_location_B] = substitute_B
-
-        zone = int(np.ceil(m / 4))
-        ref_mp = naive.stamp(T_B_sub, m, exclusion_zone=zone)
-        comp_mp = stump(T_B_sub, m, ignore_trivial=True)
-        naive.replace_inf(ref_mp)
-        naive.replace_inf(comp_mp)
-        npt.assert_almost_equal(ref_mp, comp_mp)
-
-        comp_mp = stump(pd.Series(T_B_sub), m, ignore_trivial=True)
-        naive.replace_inf(comp_mp)
-        npt.assert_almost_equal(ref_mp, comp_mp)
-
-
-@pytest.mark.parametrize("T_A, T_B", test_data)
-@pytest.mark.parametrize("substitute_A", substitution_values)
-@pytest.mark.parametrize("substitute_B", substitution_values)
-@pytest.mark.parametrize("substitution_locations", substitution_locations)
-def test_stump_nan_inf_A_B_join(
-    T_A, T_B, substitute_A, substitute_B, substitution_locations
-):
-    m = 3
-
-    T_A_sub = T_A.copy()
-    T_B_sub = T_B.copy()
-
-    for substitution_location_B in substitution_locations:
-        for substitution_location_A in substitution_locations:
-            T_A_sub[:] = T_A[:]
-            T_B_sub[:] = T_B[:]
-            T_A_sub[substitution_location_A] = substitute_A
-            T_B_sub[substitution_location_B] = substitute_B
-
-            ref_mp = naive.stamp(T_A_sub, m, T_B=T_B_sub)
-            comp_mp = stump(T_A_sub, m, T_B_sub, ignore_trivial=False)
-            naive.replace_inf(ref_mp)
-            naive.replace_inf(comp_mp)
-            npt.assert_almost_equal(ref_mp, comp_mp)
-
-            comp_mp = stump(
-                pd.Series(T_A_sub), m, pd.Series(T_B_sub), ignore_trivial=False
-            )
-            naive.replace_inf(comp_mp)
-            npt.assert_almost_equal(ref_mp, comp_mp)
-
-
-def test_stump_nan_zero_mean_self_join():
-    T = np.array([-1, 0, 1, np.inf, 1, 0, -1])
-    m = 3
-
-    zone = int(np.ceil(m / 4))
-    ref_mp = naive.stamp(T, m, exclusion_zone=zone)
-    comp_mp = stump(T, m, ignore_trivial=True)
+    npt.assert_almost_equal(ref_mp, comp_mp)
 
-    naive.replace_inf(ref_mp)
+    comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True)
     naive.replace_inf(comp_mp)
     npt.assert_almost_equal(ref_mp, comp_mp)

From 121686b43187f053f23c09f07f2cf88f0ab1c238 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 28 Apr 2022 13:09:15 -0600
Subject: [PATCH 005/151] remove variable k in 1NN test

---
 tests/test_stump_topk.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/test_stump_topk.py b/tests/test_stump_topk.py
index b3276b85b..3f277a0ad 100644
--- a/tests/test_stump_topk.py
+++ b/tests/test_stump_topk.py
@@ -28,10 +28,9 @@ def test_stump_int_input():
 
 @pytest.mark.parametrize("T_A, T_B", test_data)
 def test_stump_self_join_1NN(T_A, T_B):
-    k = 1
     m = 3
     zone = int(np.ceil(m / 4))
-    ref_mp = naive.stump_topk(T_B, m, exclusion_zone=zone, k=k)
+    ref_mp = naive.stump_topk(T_B, m, exclusion_zone=zone, k=1)
     comp_mp = stump(T_B, m, ignore_trivial=True)
     naive.replace_inf(ref_mp)
     naive.replace_inf(comp_mp)

From 730bfbbee7e867b2373e5060503492bab533efd8 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 28 Apr 2022 13:11:49 -0600
Subject: [PATCH 006/151] Fixed passing input to test function

---
 tests/test_stump_topk.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_stump_topk.py b/tests/test_stump_topk.py
index 3f277a0ad..4b722fd8f 100644
--- a/tests/test_stump_topk.py
+++ b/tests/test_stump_topk.py
@@ -41,6 +41,7 @@ def test_stump_self_join_1NN(T_A, T_B):
     npt.assert_almost_equal(ref_mp, comp_mp)
 
 
+@pytest.mark.parametrize("T_A, T_B", test_data)
 def test_stump_self_join_KNN(T_A, T_B):
     k = 3
     m = 3

From f78348f3fadaface820e558c909e19cb0803503c Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 28 Apr 2022 13:19:43 -0600
Subject: [PATCH 007/151] Fixed minor bug

---
 tests/naive.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 8f3a05da8..6dd4bcb99 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1756,11 +1756,11 @@ def stump_topk(T_A, m, T_B=None, exclusion_zone=None, k=1):
         if g >= 0:
             iter_range = range(0, min(n_A - m + 1, n_B - m + 1 - g))
         else:
-            iter_range = range(-k, min(n_A - m + 1, n_B - m + 1 - g))
+            iter_range = range(-g, min(n_A - m + 1, n_B - m + 1 - g))
 
         for i in iter_range:
             D = distance_matrix[i, i + g]
-            if D < P[i, k - 1]:
+            if D < P[i, k - 1]: #less than k-th smallest value of T[i:i+m]
                 idx = np.searchsorted(P[i, :k], D, side='right')
                 P[i, :k] = np.insert(P[i, :k], idx, D)[:-1]
                 I[i, :k] = np.insert(I[i, :k], idx, i + g)[:-1]

From e09b5f05d16c4506ded15df432fcd27b2fc822df Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 28 Apr 2022 13:31:18 -0600
Subject: [PATCH 008/151] Correct format

---
 tests/naive.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 6dd4bcb99..91a88cea7 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1760,14 +1760,14 @@ def stump_topk(T_A, m, T_B=None, exclusion_zone=None, k=1):
 
         for i in iter_range:
             D = distance_matrix[i, i + g]
-            if D < P[i, k - 1]: #less than k-th smallest value of T[i:i+m]
-                idx = np.searchsorted(P[i, :k], D, side='right')
+            if D < P[i, k - 1]:  # less than k-th smallest value of T[i:i+m]
+                idx = np.searchsorted(P[i, :k], D, side="right")
                 P[i, :k] = np.insert(P[i, :k], idx, D)[:-1]
                 I[i, :k] = np.insert(I[i, :k], idx, i + g)[:-1]
 
             if ignore_trivial:  # Self-joins only
                 if D < P[i + g, k - 1]:
-                    idx = np.searchsorted(P[i + g, :k], D, side='right')
+                    idx = np.searchsorted(P[i + g, :k], D, side="right")
                     P[i + g, :k] = np.insert(P[i + g, :k], idx, D)[:-1]
                     I[i + g, :k] = np.insert(I[i + g, :k], idx, i)[:-1]
 

From 95a8c081f745ea8781da5b4eaefceea936559471 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 29 Apr 2022 01:15:01 -0600
Subject: [PATCH 009/151] Erase function stump_topk

---
 tests/naive.py | 71 --------------------------------------------------
 1 file changed, 71 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 91a88cea7..4089e603e 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1716,74 +1716,3 @@ def _total_diagonal_ndists(tile_lower_diag, tile_upper_diag, tile_height, tile_w
         )
 
     return total_ndists
-
-
-def stump_topk(T_A, m, T_B=None, exclusion_zone=None, k=1):
-    """
-    Traverse distance matrix along the diagonals and update the top-k
-    nearest neigbors matrix profile and matrix profile indices
-    """
-    if T_B is None:  # self-join:
-        ignore_trivial = True
-        distance_matrix = np.array(
-            [distance_profile(Q, T_A, m) for Q in core.rolling_window(T_A, m)]
-        )
-        T_B = T_A.copy()
-    else:
-        ignore_trivial = False
-        distance_matrix = np.array(
-            [distance_profile(Q, T_B, m) for Q in core.rolling_window(T_A, m)]
-        )
-
-    distance_matrix[np.isnan(distance_matrix)] = np.inf
-
-    n_A = T_A.shape[0]
-    n_B = T_B.shape[0]
-    l = n_A - m + 1
-    if exclusion_zone is None:
-        exclusion_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
-
-    if ignore_trivial:
-        diags = np.arange(exclusion_zone + 1, n_A - m + 1)
-    else:
-        diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1)
-
-    # the last two columns in P and I are to keep track of right and left mp for 1NN
-    P = np.full((l, k + 2), np.inf)
-    I = np.full((l, k + 2), -1, dtype=np.int64)
-
-    for g in diags:
-        if g >= 0:
-            iter_range = range(0, min(n_A - m + 1, n_B - m + 1 - g))
-        else:
-            iter_range = range(-g, min(n_A - m + 1, n_B - m + 1 - g))
-
-        for i in iter_range:
-            D = distance_matrix[i, i + g]
-            if D < P[i, k - 1]:  # less than k-th smallest value of T[i:i+m]
-                idx = np.searchsorted(P[i, :k], D, side="right")
-                P[i, :k] = np.insert(P[i, :k], idx, D)[:-1]
-                I[i, :k] = np.insert(I[i, :k], idx, i + g)[:-1]
-
-            if ignore_trivial:  # Self-joins only
-                if D < P[i + g, k - 1]:
-                    idx = np.searchsorted(P[i + g, :k], D, side="right")
-                    P[i + g, :k] = np.insert(P[i + g, :k], idx, D)[:-1]
-                    I[i + g, :k] = np.insert(I[i + g, :k], idx, i)[:-1]
-
-                if i < i + g:
-                    # Left matrix profile and left matrix profile index
-                    if D < P[i + g, k]:
-                        P[i + g, k] = D
-                        I[i + g, k] = i
-
-                    if D < P[i, k + 1]:
-                        # right matrix profile and right matrix profile index
-                        P[i, k + 1] = D
-                        I[i, k + 1] = i + g
-
-    result = np.empty((l, 2 * k + 2), dtype=object)
-    result[:, :k] = P[:, :k]
-    result[:, k:] = I[:, :]
-
-    return result

From d0701fedd3060dcb0b97a266ceaae4beacae52e8 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 29 Apr 2022 16:18:58 -0600
Subject: [PATCH 010/151] Revise naive.stump to return topk NN matrix profile

---
 tests/naive.py | 64 +++++++++++++++++++++-----------------------------
 1 file changed, 27 insertions(+), 37 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 4089e603e..0c49c5746 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -156,7 +156,7 @@ def stamp(T_A, m, T_B=None, exclusion_zone=None):
     return result
 
 
-def stump(T_A, m, T_B=None, exclusion_zone=None):
+def stump(T_A, m, T_B=None, exclusion_zone=None, k=1):
     """
     Traverse distance matrix along the diagonals and update the matrix profile and
     matrix profile indices
@@ -181,45 +181,35 @@ def stump(T_A, m, T_B=None, exclusion_zone=None):
     if exclusion_zone is None:
         exclusion_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
 
+    is_included = np.ones_like(distance_matrix, dtype=bool)
     if ignore_trivial:
-        diags = np.arange(exclusion_zone + 1, n_A - m + 1)
-    else:
-        diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1)
+        for i in range(l):
+            apply_exclusion_zone(is_included[i], i, exclusion_zone, False)
 
-    P = np.full((l, 3), np.inf)
-    I = np.full((l, 3), -1, dtype=np.int64)
+    P = np.full((l, k), np.inf)
+    I = np.full((l, k + 2), -1, dtype=np.int64)
 
-    for k in diags:
-        if k >= 0:
-            iter_range = range(0, min(n_A - m + 1, n_B - m + 1 - k))
-        else:
-            iter_range = range(-k, min(n_A - m + 1, n_B - m + 1 - k))
-
-        for i in iter_range:
-            D = distance_matrix[i, i + k]
-            if D < P[i, 0]:
-                P[i, 0] = D
-                I[i, 0] = i + k
-
-            if ignore_trivial:  # Self-joins only
-                if D < P[i + k, 0]:
-                    P[i + k, 0] = D
-                    I[i + k, 0] = i
-
-                if i < i + k:
-                    # Left matrix profile and left matrix profile index
-                    if D < P[i + k, 1]:
-                        P[i + k, 1] = D
-                        I[i + k, 1] = i
-
-                    if D < P[i, 2]:
-                        # right matrix profile and right matrix profile index
-                        P[i, 2] = D
-                        I[i, 2] = i + k
-
-    result = np.empty((l, 4), dtype=object)
-    result[:, 0] = P[:, 0]
-    result[:, 1:4] = I[:, :]
+    for i in range(l):
+        mask = is_included[i]
+        IDX = np.argsort(distance_matrix[i][mask])
+        nn_indices_sorted = np.flatnonzero(mask)[IDX]
+
+        topk_indices = nn_indices_sorted[:k]
+        P[i, :k] = distance_matrix[i][topk_indices]
+        I[i, :k] = topk_indices
+
+        if ignore_trivial:
+            left_indices = nn_indices_sorted[nn_indices_sorted < i]
+            if len(left_indices) > 0:
+                I[i, k] = left_indices[0]
+
+            right_indices = nn_indices_sorted[nn_indices_sorted > i]
+            if len(right_indices) > 0:
+                I[i, k + 1] = right_indices[0]
+
+    result = np.empty((l, 2 * k + 2), dtype=object)
+    result[:, :k] = P[:, :]
+    result[:, k:] = I[:, :]
 
     return result
 

From 54445994ac87bccecf2a4252044d7e5cd0434718 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 29 Apr 2022 16:29:53 -0600
Subject: [PATCH 011/151] Added a few comments

---
 tests/naive.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/naive.py b/tests/naive.py
index 0c49c5746..f9c9226ef 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -185,9 +185,13 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, k=1):
     if ignore_trivial:
         for i in range(l):
             apply_exclusion_zone(is_included[i], i, exclusion_zone, False)
+            # replacing values of distanc matrix to np.inf in excluion zone
+            # can cause problem later if there is nan/np.inf in data. So,
+            # it is better to use mask.
 
     P = np.full((l, k), np.inf)
-    I = np.full((l, k + 2), -1, dtype=np.int64)
+    I = np.full((l, k + 2), -1, dtype=np.int64) # two more columns in I are
+    # to store left and right matrix profile indices.
 
     for i in range(l):
         mask = is_included[i]

From 9ebb08a4f274cd7c4e1f5a5f11c5c92cb5839721 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 29 Apr 2022 17:17:06 -0600
Subject: [PATCH 012/151] Add one new test case for topk matrix profile

---
 tests/test_stump.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/tests/test_stump.py b/tests/test_stump.py
index d3475122f..67a6ec704 100644
--- a/tests/test_stump.py
+++ b/tests/test_stump.py
@@ -240,3 +240,19 @@ def test_stump_nan_zero_mean_self_join():
     naive.replace_inf(ref_mp)
     naive.replace_inf(comp_mp)
     npt.assert_almost_equal(ref_mp, comp_mp)
+
+
+@pytest.mark.parametrize("T_A, T_B", test_data)
+def test_stump_self_join_KNN(T_A, T_B):
+    k = 2
+    m = 3
+    zone = int(np.ceil(m / 4))
+    ref_mp = naive.stump(T_B, m, exclusion_zone=zone, k=k)
+    comp_mp = stump(T_B, m, ignore_trivial=True)
+    naive.replace_inf(ref_mp)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp, comp_mp)
+
+    comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp, comp_mp)

From d83e8e6355813c15dbfc111a1e853ce1879c3027 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 29 Apr 2022 17:20:17 -0600
Subject: [PATCH 013/151] Removed unnecessary test file

---
 tests/test_stump_topk.py | 57 ----------------------------------------
 1 file changed, 57 deletions(-)
 delete mode 100644 tests/test_stump_topk.py

diff --git a/tests/test_stump_topk.py b/tests/test_stump_topk.py
deleted file mode 100644
index 4b722fd8f..000000000
--- a/tests/test_stump_topk.py
+++ /dev/null
@@ -1,57 +0,0 @@
-import numpy as np
-import numpy.testing as npt
-import pandas as pd
-from stumpy import stump, config
-import pytest
-import naive
-
-
-test_data = [
-    (
-        np.array([9, 8100, -60, 7], dtype=np.float64),
-        np.array([584, -11, 23, 79, 1001, 0, -19], dtype=np.float64),
-    ),
-    (
-        np.random.uniform(-1000, 1000, [8]).astype(np.float64),
-        np.random.uniform(-1000, 1000, [64]).astype(np.float64),
-    ),
-]
-
-substitution_locations = [(slice(0, 0), 0, -1, slice(1, 3), [0, 3])]
-substitution_values = [np.nan, np.inf]
-
-
-def test_stump_int_input():
-    with pytest.raises(TypeError):
-        stump(np.arange(10), 5)
-
-
-@pytest.mark.parametrize("T_A, T_B", test_data)
-def test_stump_self_join_1NN(T_A, T_B):
-    m = 3
-    zone = int(np.ceil(m / 4))
-    ref_mp = naive.stump_topk(T_B, m, exclusion_zone=zone, k=1)
-    comp_mp = stump(T_B, m, ignore_trivial=True)
-    naive.replace_inf(ref_mp)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp, comp_mp)
-
-    comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp, comp_mp)
-
-
-@pytest.mark.parametrize("T_A, T_B", test_data)
-def test_stump_self_join_KNN(T_A, T_B):
-    k = 3
-    m = 3
-    zone = int(np.ceil(m / 4))
-    ref_mp = naive.stump_topk(T_B, m, exclusion_zone=zone, k=k)
-    comp_mp = stump(T_B, m, ignore_trivial=True)
-    naive.replace_inf(ref_mp)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp, comp_mp)
-
-    comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp, comp_mp)

From 9c8f019353991898bd8ad248053353af19e7c288 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 29 Apr 2022 20:58:31 -0600
Subject: [PATCH 014/151] Set I to -1 if its corresponding P is not finite

---
 tests/naive.py | 37 +++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index f9c9226ef..d3640b66c 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -158,8 +158,8 @@ def stamp(T_A, m, T_B=None, exclusion_zone=None):
 
 def stump(T_A, m, T_B=None, exclusion_zone=None, k=1):
     """
-    Traverse distance matrix along the diagonals and update the matrix profile and
-    matrix profile indices
+    Traverse distance matrix in a row-wise manner and store topk nearest neighbor
+    matrix profile and matrix profile indices
     """
     if T_B is None:  # self-join:
         ignore_trivial = True
@@ -181,35 +181,36 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, k=1):
     if exclusion_zone is None:
         exclusion_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
 
-    is_included = np.ones_like(distance_matrix, dtype=bool)
     if ignore_trivial:
         for i in range(l):
-            apply_exclusion_zone(is_included[i], i, exclusion_zone, False)
-            # replacing values of distanc matrix to np.inf in excluion zone
-            # can cause problem later if there is nan/np.inf in data. So,
-            # it is better to use mask.
+            apply_exclusion_zone(distance_matrix[i], i, exclusion_zone, np.inf)
 
     P = np.full((l, k), np.inf)
-    I = np.full((l, k + 2), -1, dtype=np.int64) # two more columns in I are
+    I = np.full((l, k + 2), -1, dtype=np.int64)  # two more columns in I are
     # to store left and right matrix profile indices.
 
     for i in range(l):
-        mask = is_included[i]
-        IDX = np.argsort(distance_matrix[i][mask])
-        nn_indices_sorted = np.flatnonzero(mask)[IDX]
-
-        topk_indices = nn_indices_sorted[:k]
+        indices = np.argsort(distance_matrix[i])
+        topk_indices = indices[:k]
         P[i, :k] = distance_matrix[i][topk_indices]
-        I[i, :k] = topk_indices
+        I[i, :k] = np.where(distance_matrix[i][topk_indices] != np.inf, topk_indices, -1)
 
         if ignore_trivial:
-            left_indices = nn_indices_sorted[nn_indices_sorted < i]
+            IL = -1
+            left_indices = indices[indices < i]
             if len(left_indices) > 0:
-                I[i, k] = left_indices[0]
+                IL = left_indices[0]
+            if distance_matrix[i][IL] == np.inf:
+                IL = -1
+            I[i, k] = IL
 
-            right_indices = nn_indices_sorted[nn_indices_sorted > i]
+            IR = -1
+            right_indices = indices[indices > i]
             if len(right_indices) > 0:
-                I[i, k + 1] = right_indices[0]
+                IR = right_indices[0]
+            if distance_matrix[i][IR] == np.inf:
+                IR = -1
+            I[i, k + 1] = IR
 
     result = np.empty((l, 2 * k + 2), dtype=object)
     result[:, :k] = P[:, :]

From 0ce959549502e8091d1d017da8c95df73ae45401 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 29 Apr 2022 21:04:16 -0600
Subject: [PATCH 015/151] Removed new test function

---
 tests/test_stump.py | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/tests/test_stump.py b/tests/test_stump.py
index 67a6ec704..4d2bf312b 100644
--- a/tests/test_stump.py
+++ b/tests/test_stump.py
@@ -242,17 +242,17 @@ def test_stump_nan_zero_mean_self_join():
     npt.assert_almost_equal(ref_mp, comp_mp)
 
 
-@pytest.mark.parametrize("T_A, T_B", test_data)
-def test_stump_self_join_KNN(T_A, T_B):
-    k = 2
-    m = 3
-    zone = int(np.ceil(m / 4))
-    ref_mp = naive.stump(T_B, m, exclusion_zone=zone, k=k)
-    comp_mp = stump(T_B, m, ignore_trivial=True)
-    naive.replace_inf(ref_mp)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp, comp_mp)
-
-    comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp, comp_mp)
+#@pytest.mark.parametrize("T_A, T_B", test_data)
+#def test_stump_self_join_KNN(T_A, T_B):
+#    k = 2
+#    m = 3
+#    zone = int(np.ceil(m / 4))
+#    ref_mp = naive.stump(T_B, m, exclusion_zone=zone, k=k)
+#    comp_mp = stump(T_B, m, ignore_trivial=True)
+#    naive.replace_inf(ref_mp)
+#    naive.replace_inf(comp_mp)
+#    npt.assert_almost_equal(ref_mp, comp_mp)
+
+#    comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True)
+#    naive.replace_inf(comp_mp)
+#    npt.assert_almost_equal(ref_mp, comp_mp)

From a9726984574deca4eb79c74b622581036604635c Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 29 Apr 2022 21:06:59 -0600
Subject: [PATCH 016/151] Fixed format

---
 tests/naive.py      |  4 +++-
 tests/test_stump.py | 16 ----------------
 2 files changed, 3 insertions(+), 17 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index d3640b66c..98f639a08 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -193,7 +193,9 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, k=1):
         indices = np.argsort(distance_matrix[i])
         topk_indices = indices[:k]
         P[i, :k] = distance_matrix[i][topk_indices]
-        I[i, :k] = np.where(distance_matrix[i][topk_indices] != np.inf, topk_indices, -1)
+        I[i, :k] = np.where(
+            distance_matrix[i][topk_indices] != np.inf, topk_indices, -1
+        )
 
         if ignore_trivial:
             IL = -1
diff --git a/tests/test_stump.py b/tests/test_stump.py
index 4d2bf312b..d3475122f 100644
--- a/tests/test_stump.py
+++ b/tests/test_stump.py
@@ -240,19 +240,3 @@ def test_stump_nan_zero_mean_self_join():
     naive.replace_inf(ref_mp)
     naive.replace_inf(comp_mp)
     npt.assert_almost_equal(ref_mp, comp_mp)
-
-
-#@pytest.mark.parametrize("T_A, T_B", test_data)
-#def test_stump_self_join_KNN(T_A, T_B):
-#    k = 2
-#    m = 3
-#    zone = int(np.ceil(m / 4))
-#    ref_mp = naive.stump(T_B, m, exclusion_zone=zone, k=k)
-#    comp_mp = stump(T_B, m, ignore_trivial=True)
-#    naive.replace_inf(ref_mp)
-#    naive.replace_inf(comp_mp)
-#    npt.assert_almost_equal(ref_mp, comp_mp)
-
-#    comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True)
-#    naive.replace_inf(comp_mp)
-#    npt.assert_almost_equal(ref_mp, comp_mp)

From e2d3061e132316cad0e4bbb74d0ff8f5bf0e52ce Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 29 Apr 2022 21:14:07 -0600
Subject: [PATCH 017/151] minor change

---
 tests/naive.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 98f639a08..429b2ac99 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -161,7 +161,10 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, k=1):
     Traverse distance matrix in a row-wise manner and store topk nearest neighbor
     matrix profile and matrix profile indices
     """
-    if T_B is None:  # self-join:
+    if exclusion_zone is None:
+        exclusion_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
+
+    if T_B is None: # self-join:
         ignore_trivial = True
         distance_matrix = np.array(
             [distance_profile(Q, T_A, m) for Q in core.rolling_window(T_A, m)]
@@ -175,12 +178,7 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, k=1):
 
     distance_matrix[np.isnan(distance_matrix)] = np.inf
 
-    n_A = T_A.shape[0]
-    n_B = T_B.shape[0]
-    l = n_A - m + 1
-    if exclusion_zone is None:
-        exclusion_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
-
+    l = T_A.shape[0] - m + 1
     if ignore_trivial:
         for i in range(l):
             apply_exclusion_zone(distance_matrix[i], i, exclusion_zone, np.inf)

From 1938f63363dc873a7c00300c66c54742ec9b0010 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 29 Apr 2022 21:16:46 -0600
Subject: [PATCH 018/151] minor change

---
 tests/naive.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/naive.py b/tests/naive.py
index 429b2ac99..ff50eecf7 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -164,7 +164,7 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, k=1):
     if exclusion_zone is None:
         exclusion_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
 
-    if T_B is None: # self-join:
+    if T_B is None:  # self-join:
         ignore_trivial = True
         distance_matrix = np.array(
             [distance_profile(Q, T_A, m) for Q in core.rolling_window(T_A, m)]

From 0e25a347ad7a3fa50d63144e32df771d9ad57545 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 29 Apr 2022 22:32:24 -0600
Subject: [PATCH 019/151] Add new test function for topk matrix profile

---
 tests/test_stump.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/tests/test_stump.py b/tests/test_stump.py
index d3475122f..ea4bae3c9 100644
--- a/tests/test_stump.py
+++ b/tests/test_stump.py
@@ -240,3 +240,18 @@ def test_stump_nan_zero_mean_self_join():
     naive.replace_inf(ref_mp)
     naive.replace_inf(comp_mp)
     npt.assert_almost_equal(ref_mp, comp_mp)
+
+@pytest.mark.parametrize("T_A, T_B", test_data)
+def test_stump_self_join_KNN(T_A, T_B):
+    k = 2
+    m = 3
+    zone = int(np.ceil(m / 4))
+    ref_mp = naive.stump(T_B, m, exclusion_zone=zone, k=k)
+    comp_mp = stump(T_B, m, ignore_trivial=True)
+    naive.replace_inf(ref_mp)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp, comp_mp)
+
+    comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp, comp_mp)

From e3935851485cc4ecd9c097c915ab37c3946530fd Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 29 Apr 2022 22:34:13 -0600
Subject: [PATCH 020/151] Fixed format

---
 tests/test_stump.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_stump.py b/tests/test_stump.py
index ea4bae3c9..67a6ec704 100644
--- a/tests/test_stump.py
+++ b/tests/test_stump.py
@@ -241,6 +241,7 @@ def test_stump_nan_zero_mean_self_join():
     naive.replace_inf(comp_mp)
     npt.assert_almost_equal(ref_mp, comp_mp)
 
+
 @pytest.mark.parametrize("T_A, T_B", test_data)
 def test_stump_self_join_KNN(T_A, T_B):
     k = 2

From 850a5946c88465a4fa93fd91b113015752860ff2 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 2 May 2022 12:53:45 -0600
Subject: [PATCH 021/151] Use diagonal traversal to get top-k matrix profile -
 change naive.stump from row-wise to traversal - add a note to docstring to
 inform reader of row-wise traversal - use numpy.searchsort(side='right')

---
 tests/naive.py | 83 ++++++++++++++++++++++++++++----------------------
 1 file changed, 47 insertions(+), 36 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 554c6f9fd..552c85cee 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -158,12 +158,11 @@ def stamp(T_A, m, T_B=None, exclusion_zone=None):
 
 def stump(T_A, m, T_B=None, exclusion_zone=None, k=1):
     """
-    Traverse distance matrix in a row-wise manner and store topk nearest neighbor
-    matrix profile and matrix profile indices
-    """
-    if exclusion_zone is None:
-        exclusion_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
+    Traverse distance matrix along the diagonals and update the top-k nearest
+    neighbor  matrix profile and matrix profile indices
 
+    NOTE: For row-wise traversal, please use function `stamp`
+    """
     if T_B is None:  # self-join:
         ignore_trivial = True
         distance_matrix = np.array(
@@ -178,42 +177,54 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, k=1):
 
     distance_matrix[np.isnan(distance_matrix)] = np.inf
 
-    l = T_A.shape[0] - m + 1
+    n_A = T_A.shape[0]
+    n_B = T_B.shape[0]
+    l = n_A - m + 1
+    if exclusion_zone is None:
+        exclusion_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
+
     if ignore_trivial:
-        for i in range(l):
-            apply_exclusion_zone(distance_matrix[i], i, exclusion_zone, np.inf)
+        diags = np.arange(exclusion_zone + 1, n_A - m + 1)
+    else:
+        diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1)
 
-    P = np.full((l, k), np.inf)
-    I = np.full((l, k + 2), -1, dtype=np.int64)  # two more columns in I are
-    # to store left and right matrix profile indices.
+    P = np.full((l, k + 2), np.inf)
+    I = np.full((l, k + 2), -1, dtype=np.int64) # two more columns are to store
+    # ... left and right top-1 matrix profile indices.
 
-    for i in range(l):
-        indices = np.argsort(distance_matrix[i])
-        topk_indices = indices[:k]
-        P[i, :k] = distance_matrix[i][topk_indices]
-        I[i, :k] = np.where(
-            distance_matrix[i][topk_indices] != np.inf, topk_indices, -1
-        )
+    for g in diags:
+        if g >= 0:
+            iter_range = range(0, min(n_A - m + 1, n_B - m + 1 - g))
+        else:
+            iter_range = range(-g, min(n_A - m + 1, n_B - m + 1 - g))
 
-        if ignore_trivial:
-            IL = -1
-            left_indices = indices[indices < i]
-            if len(left_indices) > 0:
-                IL = left_indices[0]
-            if distance_matrix[i][IL] == np.inf:
-                IL = -1
-            I[i, k] = IL
+        for i in iter_range:
+            D = distance_matrix[i, i + g]
+            if D < P[i, k-1]:
+                idx = np.searchsorted(P[i, :k], D, side='right')
+                # to keep the top-k, we need to the get rid of the last element.
+                P[i, :k] = np.insert(P[i, :k], idx, D)[:-1]
+                I[i, :k] = np.insert(I[i, :k], idx, i + g)[:-1]
 
-            IR = -1
-            right_indices = indices[indices > i]
-            if len(right_indices) > 0:
-                IR = right_indices[0]
-            if distance_matrix[i][IR] == np.inf:
-                IR = -1
-            I[i, k + 1] = IR
-
-    result = np.empty((l, 2 * k + 2), dtype=object)
-    result[:, :k] = P[:, :]
+            if ignore_trivial:  # Self-joins only
+                if D < P[i + g, k-1]:
+                    idx = np.searchsorted(P[i + g, :k], D, side='right')
+                    P[i + g, :k] = np.insert(P[i + g, :k], idx, D)[:-1]
+                    I[i + g, :k] = np.insert(I[i + g, :k], idx, i)[:-1]
+
+                if i < i + g:
+                    # Left matrix profile and left matrix profile index
+                    if D < P[i + g, k]:
+                        P[i + g, k] = D
+                        I[i + g, k] = i
+
+                    if D < P[i, k + 1]:
+                        # right matrix profile and right matrix profile index
+                        P[i, k + 1] = D
+                        I[i, k + 1] = i + g
+
+    result = np.empty((2 * k + 2, 4), dtype=object)
+    result[:, :k] = P[:, :k]
     result[:, k:] = I[:, :]
 
     return result

From 278e76ca5e74c53276b1e20cc6d4ab3efd8bc078 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 2 May 2022 13:00:21 -0600
Subject: [PATCH 022/151] Fixed shape of naive.stump output

---
 tests/naive.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/naive.py b/tests/naive.py
index 552c85cee..871d52024 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -223,7 +223,7 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, k=1):
                         P[i, k + 1] = D
                         I[i, k + 1] = i + g
 
-    result = np.empty((2 * k + 2, 4), dtype=object)
+    result = np.empty((l, 2 * k + 2), dtype=object)
     result[:, :k] = P[:, :k]
     result[:, k:] = I[:, :]
 

From a864662b41f8553df6fcc1f1b9b3b341beb5cc31 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 2 May 2022 13:29:52 -0600
Subject: [PATCH 023/151] Add naive version of numpy.searchsorted

---
 tests/naive.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tests/naive.py b/tests/naive.py
index 871d52024..010836639 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -156,6 +156,14 @@ def stamp(T_A, m, T_B=None, exclusion_zone=None):
     return result
 
 
+def searchsorted(a, v):
+    indices = np.flatnonzero(v < a)
+    if len(indices):
+        return indices.min()
+    else:
+        return len(a)
+
+
 def stump(T_A, m, T_B=None, exclusion_zone=None, k=1):
     """
     Traverse distance matrix along the diagonals and update the top-k nearest

From f0c022da2fb61b1c9840d59e3a2034222dae65c4 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 2 May 2022 13:30:41 -0600
Subject: [PATCH 024/151] Replace numpy.searchsorted with its naive version

---
 tests/naive.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 010836639..24ca851c7 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -209,14 +209,14 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, k=1):
         for i in iter_range:
             D = distance_matrix[i, i + g]
             if D < P[i, k-1]:
-                idx = np.searchsorted(P[i, :k], D, side='right')
+                idx = searchsorted(P[i, :k], D, side='right')
                 # to keep the top-k, we need to the get rid of the last element.
                 P[i, :k] = np.insert(P[i, :k], idx, D)[:-1]
                 I[i, :k] = np.insert(I[i, :k], idx, i + g)[:-1]
 
             if ignore_trivial:  # Self-joins only
                 if D < P[i + g, k-1]:
-                    idx = np.searchsorted(P[i + g, :k], D, side='right')
+                    idx = searchsorted(P[i + g, :k], D, side='right')
                     P[i + g, :k] = np.insert(P[i + g, :k], idx, D)[:-1]
                     I[i + g, :k] = np.insert(I[i + g, :k], idx, i)[:-1]
 

From 81701ba3620abb480b3852909ffe6fd0b46874ec Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 2 May 2022 13:33:35 -0600
Subject: [PATCH 025/151] Fixed calling function searchsorted

---
 tests/naive.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 24ca851c7..a282d49c0 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -157,6 +157,9 @@ def stamp(T_A, m, T_B=None, exclusion_zone=None):
 
 
 def searchsorted(a, v):
+    """
+    naive version of numpy.searchsorted(..., side='right')
+    """
     indices = np.flatnonzero(v < a)
     if len(indices):
         return indices.min()
@@ -209,14 +212,14 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, k=1):
         for i in iter_range:
             D = distance_matrix[i, i + g]
             if D < P[i, k-1]:
-                idx = searchsorted(P[i, :k], D, side='right')
+                idx = searchsorted(P[i, :k], D)
                 # to keep the top-k, we need to the get rid of the last element.
                 P[i, :k] = np.insert(P[i, :k], idx, D)[:-1]
                 I[i, :k] = np.insert(I[i, :k], idx, i + g)[:-1]
 
             if ignore_trivial:  # Self-joins only
                 if D < P[i + g, k-1]:
-                    idx = searchsorted(P[i + g, :k], D, side='right')
+                    idx = searchsorted(P[i + g, :k], D)
                     P[i + g, :k] = np.insert(P[i + g, :k], idx, D)[:-1]
                     I[i + g, :k] = np.insert(I[i + g, :k], idx, i)[:-1]
 

From e244341a9291119a6f3f48ca07f9b7a11203c545 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 2 May 2022 13:36:51 -0600
Subject: [PATCH 026/151] Fixed format

---
 tests/naive.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index a282d49c0..0f70ae7b4 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -200,7 +200,7 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, k=1):
         diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1)
 
     P = np.full((l, k + 2), np.inf)
-    I = np.full((l, k + 2), -1, dtype=np.int64) # two more columns are to store
+    I = np.full((l, k + 2), -1, dtype=np.int64)  # two more columns are to store
     # ... left and right top-1 matrix profile indices.
 
     for g in diags:
@@ -211,14 +211,14 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, k=1):
 
         for i in iter_range:
             D = distance_matrix[i, i + g]
-            if D < P[i, k-1]:
+            if D < P[i, k - 1]:
                 idx = searchsorted(P[i, :k], D)
                 # to keep the top-k, we need to the get rid of the last element.
                 P[i, :k] = np.insert(P[i, :k], idx, D)[:-1]
                 I[i, :k] = np.insert(I[i, :k], idx, i + g)[:-1]
 
             if ignore_trivial:  # Self-joins only
-                if D < P[i + g, k-1]:
+                if D < P[i + g, k - 1]:
                     idx = searchsorted(P[i + g, :k], D)
                     P[i + g, :k] = np.insert(P[i + g, :k], idx, D)[:-1]
                     I[i + g, :k] = np.insert(I[i + g, :k], idx, i)[:-1]

From 1806c66241547cbdd9ac02c0313d16157b5f700e Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 9 May 2022 12:13:05 -0600
Subject: [PATCH 027/151] minor changes

---
 tests/naive.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/naive.py b/tests/naive.py
index 5592af064..3028dd15c 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -240,7 +240,7 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, row_wise=False, k=1):
                 D = distance_matrix[i, i + g] # D: a single element
                 if D < P[i, k - 1]:
                     idx = searchsorted(P[i, :k], D)
-                    # to keep the top-k, we need to the get rid of the last element.
+                    # to keep the top-k, we must get rid of the last element.
                     P[i, :k] = np.insert(P[i, :k], idx, D)[:-1]
                     I[i, :k] = np.insert(I[i, :k], idx, i + g)[:-1]
 

From ad29c19cc83d6388a1caab1136fdb4fbf82596fb Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 9 May 2022 12:14:10 -0600
Subject: [PATCH 028/151] Correct format

---
 tests/naive.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 3028dd15c..849c8d080 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -203,11 +203,11 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, row_wise=False, k=1):
             for i in range(l):
                 apply_exclusion_zone(distance_matrix[i], i, exclusion_zone, np.inf)
 
-        for i, D in enumerate(distance_matrix): # D: distance profile
+        for i, D in enumerate(distance_matrix):  # D: distance profile
             # self-join / AB-join: matrix proifle and indices
             indices = np.argsort(D)[:k]
             P[i, :k] = D[indices]
-            indices[P[i,:k] == np.inf] = -1
+            indices[P[i, :k] == np.inf] = -1
             I[i, :k] = indices
 
             # self-join: left matrix profile index (top-1)
@@ -237,7 +237,7 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, row_wise=False, k=1):
                 iter_range = range(-g, min(n_A - m + 1, n_B - m + 1 - g))
 
             for i in iter_range:
-                D = distance_matrix[i, i + g] # D: a single element
+                D = distance_matrix[i, i + g]  # D: a single element
                 if D < P[i, k - 1]:
                     idx = searchsorted(P[i, :k], D)
                     # to keep the top-k, we must get rid of the last element.

From 448d65d69d10c03063c29062cf6c09124281eb78 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 9 May 2022 12:35:49 -0600
Subject: [PATCH 029/151] Correct flake8 style

---
 tests/naive.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/naive.py b/tests/naive.py
index 849c8d080..dacba3075 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -158,7 +158,7 @@ def stamp(T_A, m, T_B=None, exclusion_zone=None):  # pragma: no cover
 
 def searchsorted(a, v):
     """
-    naive version of numpy.searchsorted(..., side='right')
+    Naive version of numpy.searchsorted(..., side='right')
     """
     indices = np.flatnonzero(v < a)
     if len(indices):

From e3ebcb5885085ab25e58ddc98acd8a7bfb7afac0 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 9 May 2022 12:46:10 -0600
Subject: [PATCH 030/151] Avoid unnecessary slicing

---
 tests/naive.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index dacba3075..67d1fb27c 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -239,14 +239,14 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, row_wise=False, k=1):
             for i in iter_range:
                 D = distance_matrix[i, i + g]  # D: a single element
                 if D < P[i, k - 1]:
-                    idx = searchsorted(P[i, :k], D)
+                    idx = searchsorted(P[i], D)
                     # to keep the top-k, we must get rid of the last element.
                     P[i, :k] = np.insert(P[i, :k], idx, D)[:-1]
                     I[i, :k] = np.insert(I[i, :k], idx, i + g)[:-1]
 
                 if ignore_trivial:  # Self-joins only
                     if D < P[i + g, k - 1]:
-                        idx = searchsorted(P[i + g, :k], D)
+                        idx = searchsorted(P[i + g], D)
                         P[i + g, :k] = np.insert(P[i + g, :k], idx, D)[:-1]
                         I[i + g, :k] = np.insert(I[i + g, :k], idx, i)[:-1]
 

From 3cee5d85749eaa0987697e10e937fe5db65c9604 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 9 May 2022 19:28:08 -0600
Subject: [PATCH 031/151] pass parameter k to function stump

---
 tests/test_stump.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_stump.py b/tests/test_stump.py
index 783163453..1ce70acc5 100644
--- a/tests/test_stump.py
+++ b/tests/test_stump.py
@@ -248,7 +248,7 @@ def test_stump_self_join_KNN(T_A, T_B):
     m = 3
     zone = int(np.ceil(m / 4))
     ref_mp = naive.stump(T_B, m, exclusion_zone=zone, k=k)
-    comp_mp = stump(T_B, m, ignore_trivial=True)
+    comp_mp = stump(T_B, m, ignore_trivial=True, k=k)
     naive.replace_inf(ref_mp)
     naive.replace_inf(comp_mp)
     npt.assert_almost_equal(ref_mp, comp_mp)

From a1bc6a4182207f68050da74511d78f46b469b778 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 9 May 2022 19:38:44 -0600
Subject: [PATCH 032/151] Add parameter k to function stump

---
 stumpy/stump.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index 97334eb5a..115752113 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -433,7 +433,7 @@ def _stump(
 
 
 @core.non_normalized(aamp)
-def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0):
+def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
     """
     Compute the z-normalized matrix profile
 
@@ -467,6 +467,10 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0):
         The p-norm to apply for computing the Minkowski distance. This parameter is
         ignored when `normalize == True`.
 
+    k : int, default 1
+        The number of smallest elements in distance profile that should be stored
+        for constructing top-k matrix profile
+
     Returns
     -------
     out : numpy.ndarray
@@ -587,7 +591,6 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0):
     l = n_A - m + 1
 
     excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
-    out = np.empty((l, 4), dtype=object)
 
     if ignore_trivial:
         diags = np.arange(excl_zone + 1, n_A - m + 1, dtype=np.int64)
@@ -612,8 +615,9 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0):
         ignore_trivial,
     )
 
-    out[:, 0] = P[:, 0]
-    out[:, 1:] = I
+    out = np.empty((l, 2 * k + 2), dtype=object)
+    out[:, :k] = P[:, :k]
+    out[:, k:] = I
 
     threshold = 10e-6
     if core.are_distances_too_small(out[:, 0], threshold=threshold):  # pragma: no cover

From 384690cc6492019d66d8b9104a9297c5a0fbcc11 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 9 May 2022 20:21:19 -0600
Subject: [PATCH 033/151] Add parameter k to function _stump

---
 stumpy/stump.py | 37 +++++++++++++++++--------------------
 1 file changed, 17 insertions(+), 20 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index 115752113..bedd5bf6b 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -235,6 +235,7 @@ def _stump(
     T_B_subseq_isconstant,
     diags,
     ignore_trivial,
+    k,
 ):
     """
     A Numba JIT-compiled version of STOMPopt with Pearson correlations for parallel
@@ -294,6 +295,10 @@ def _stump(
         Set to `True` if this is a self-join. Otherwise, for AB-join, set this to
         `False`. Default is `True`.
 
+    k : int
+        The number of smallest elements in distance profile that should be stored
+        for constructing top-k matrix profile.
+
     Returns
     -------
     profile : numpy.ndarray
@@ -353,8 +358,8 @@ def _stump(
     n_B = T_B.shape[0]
     l = n_A - m + 1
     n_threads = numba.config.NUMBA_NUM_THREADS
-    ρ = np.full((n_threads, l, 3), -np.inf, dtype=np.float64)
-    I = np.full((n_threads, l, 3), -1, dtype=np.int64)
+    ρ = np.full((n_threads, l, k + 2), -np.inf, dtype=np.float64)
+    I = np.full((n_threads, l, k + 2), -1, dtype=np.int64)
 
     ndist_counts = core._count_diagonal_ndist(diags, m, n_A, n_B)
     diags_ranges = core._get_array_ranges(ndist_counts, n_threads, False)
@@ -406,27 +411,18 @@ def _stump(
     # Reduction of results from all threads
     for thread_idx in range(1, n_threads):
         for i in prange(l):
-            if ρ[0, i, 0] < ρ[thread_idx, i, 0]:
-                ρ[0, i, 0] = ρ[thread_idx, i, 0]
-                I[0, i, 0] = I[thread_idx, i, 0]
-            # left pearson correlation and left matrix profile indices
-            if ρ[0, i, 1] < ρ[thread_idx, i, 1]:
-                ρ[0, i, 1] = ρ[thread_idx, i, 1]
-                I[0, i, 1] = I[thread_idx, i, 1]
-            # right pearson correlation and right matrix profile indices
-            if ρ[0, i, 2] < ρ[thread_idx, i, 2]:
-                ρ[0, i, 2] = ρ[thread_idx, i, 2]
-                I[0, i, 2] = I[thread_idx, i, 2]
+            for j in range(k + 2): # alternative: use mask
+                if ρ[0, i, j] < ρ[thread_idx, i, j]:
+                    ρ[0, i, j] = ρ[thread_idx, i, j]
+                    I[0, i, j] = I[thread_idx, i, j]
 
     # Convert pearson correlations to distances
     p_norm = np.abs(2 * m * (1 - ρ[0, :, :]))
     for i in prange(p_norm.shape[0]):
-        if p_norm[i, 0] < config.STUMPY_P_NORM_THRESHOLD:
-            p_norm[i, 0] = 0.0
-        if p_norm[i, 1] < config.STUMPY_P_NORM_THRESHOLD:
-            p_norm[i, 1] = 0.0
-        if p_norm[i, 2] < config.STUMPY_P_NORM_THRESHOLD:
-            p_norm[i, 2] = 0.0
+        for j in range(p_norm.shape[1]): # p_norm.shape[1] is `k + 2`
+            if p_norm[i, j] < config.STUMPY_P_NORM_THRESHOLD:
+                p_norm[i, j] = 0.0
+
     P = np.sqrt(p_norm)
 
     return P[:, :], I[0, :, :]
@@ -469,7 +465,7 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
 
     k : int, default 1
         The number of smallest elements in distance profile that should be stored
-        for constructing top-k matrix profile
+        for constructing top-k matrix profile.
 
     Returns
     -------
@@ -613,6 +609,7 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
         T_B_subseq_isconstant,
         diags,
         ignore_trivial,
+        k,
     )
 
     out = np.empty((l, 2 * k + 2), dtype=object)

From d246736717bac279d87970a8627e3c222d8fefa9 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 9 May 2022 20:45:08 -0600
Subject: [PATCH 034/151] Fixed update of top-k rho and indices in _stump

---
 stumpy/stump.py | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index bedd5bf6b..cc70e76c4 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -411,15 +411,30 @@ def _stump(
     # Reduction of results from all threads
     for thread_idx in range(1, n_threads):
         for i in prange(l):
-            for j in range(k + 2): # alternative: use mask
-                if ρ[0, i, j] < ρ[thread_idx, i, j]:
-                    ρ[0, i, j] = ρ[thread_idx, i, j]
-                    I[0, i, j] = I[thread_idx, i, j]
+            # top-k
+            for j in range(k):
+                if ρ[0, i, k-1] < ρ[thread_idx, i, j]:
+                    idx = k - np.searchsorted(
+                    ρ[0, i, :k][::-1], ρ[thread_idx, i, j]
+                    )
+                    ρ[0, i, idx + 1 : k] = ρ[0, i, idx : k - 1]
+                    ρ[0, i, idx] = ρ[thread_idx, i, j]
+
+                    I[0, i, idx + 1 : k] = I[0, i, idx : k - 1]
+                    I[0, i, idx] = I[thread_idx, i, j]
+
+            if ρ[0, i, k] < ρ[thread_idx, i, k]:
+                ρ[0, i, k] = ρ[thread_idx, i, k]
+                I[0, i, k] = I[thread_idx, i, k]
+
+            if ρ[0, i, k + 1] < ρ[thread_idx, i, k + 1]:
+                ρ[0, i, k + 1] = ρ[thread_idx, i, k + 1]
+                I[0, i, k + 1] = I[thread_idx, i, k + 1]
 
     # Convert pearson correlations to distances
     p_norm = np.abs(2 * m * (1 - ρ[0, :, :]))
     for i in prange(p_norm.shape[0]):
-        for j in range(p_norm.shape[1]): # p_norm.shape[1] is `k + 2`
+        for j in prange(p_norm.shape[1]): # p_norm.shape[1] is `k + 2`
             if p_norm[i, j] < config.STUMPY_P_NORM_THRESHOLD:
                 p_norm[i, j] = 0.0
 

From fdff040c1324fb7c804862a02ee0cf207edad8b4 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 9 May 2022 20:59:11 -0600
Subject: [PATCH 035/151] Add parameter k to function _compute_diagonal

---
 stumpy/stump.py | 68 +++++++++++++++++++++++++++++++------------------
 1 file changed, 43 insertions(+), 25 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index cc70e76c4..f0f09e083 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -125,6 +125,10 @@ def _compute_diagonal(
         Set to `True` if this is a self-join. Otherwise, for AB-join, set this to
         `False`. Default is `True`.
 
+    k : int
+        The number of smallest elements in distance profile that should be stored
+        for constructing top-k matrix profile.
+
     Returns
     -------
     None
@@ -154,18 +158,18 @@ def _compute_diagonal(
     constant = (m - 1) * m_inverse * m_inverse  # (m - 1)/(m * m)
 
     for diag_idx in range(diags_start_idx, diags_stop_idx):
-        k = diags[diag_idx]
+        g = diags[diag_idx]
 
-        if k >= 0:
-            iter_range = range(0, min(n_A - m + 1, n_B - m + 1 - k))
+        if g >= 0:
+            iter_range = range(0, min(n_A - m + 1, n_B - m + 1 - g))
         else:
-            iter_range = range(-k, min(n_A - m + 1, n_B - m + 1 - k))
+            iter_range = range(-g, min(n_A - m + 1, n_B - m + 1 - g))
 
         for i in iter_range:
-            if i == 0 or (k < 0 and i == -k):
+            if i == 0 or (g < 0 and i == -g):
                 cov = (
                     np.dot(
-                        (T_B[i + k : i + k + m] - M_T[i + k]), (T_A[i : i + m] - μ_Q[i])
+                        (T_B[i + g : i + g + m] - M_T[i + g]), (T_A[i : i + m] - μ_Q[i])
                     )
                     * m_inverse
                 )
@@ -177,38 +181,51 @@ def _compute_diagonal(
                 #     - (T_B[i + k - 1] - M_T_m_1[i + k]) * (T_A[i - 1] - μ_Q_m_1[i])
                 # )
                 cov = cov + constant * (
-                    cov_a[i + k] * cov_b[i] - cov_c[i + k] * cov_d[i]
+                    cov_a[i + g] * cov_b[i] - cov_c[i + g] * cov_d[i]
                 )
 
-            if T_B_subseq_isfinite[i + k] and T_A_subseq_isfinite[i]:
+            if T_B_subseq_isfinite[i + g] and T_A_subseq_isfinite[i]:
                 # Neither subsequence contains NaNs
-                if T_B_subseq_isconstant[i + k] or T_A_subseq_isconstant[i]:
+                if T_B_subseq_isconstant[i + g] or T_A_subseq_isconstant[i]:
                     pearson = 0.5
                 else:
-                    pearson = cov * Σ_T_inverse[i + k] * σ_Q_inverse[i]
+                    pearson = cov * Σ_T_inverse[i + g] * σ_Q_inverse[i]
 
-                if T_B_subseq_isconstant[i + k] and T_A_subseq_isconstant[i]:
+                if T_B_subseq_isconstant[i + g] and T_A_subseq_isconstant[i]:
                     pearson = 1.0
 
-                if pearson > ρ[thread_idx, i, 0]:
-                    ρ[thread_idx, i, 0] = pearson
-                    I[thread_idx, i, 0] = i + k
+                if pearson > ρ[thread_idx, i, k - 1]:
+                    idx = k - np.searchsorted(
+                    ρ[thread_idx, i, :k][::-1], pearson
+                    )
+                    ρ[thread_idx, i, idx + 1 : k] = ρ[thread_idx, i, idx : k - 1]
+                    ρ[thread_idx, i, idx] = pearson
+                    I[thread_idx, i, idx + 1 : k] = I[thread_idx, i, idx : k - 1]
+                    I[thread_idx, i, idx] = i + g
 
                 if ignore_trivial:  # self-joins only
-                    if pearson > ρ[thread_idx, i + k, 0]:
-                        ρ[thread_idx, i + k, 0] = pearson
-                        I[thread_idx, i + k, 0] = i
-
-                    if i < i + k:
+                    if pearson > ρ[thread_idx, i + g, k - 1]:
+                        idx = k - np.searchsorted(
+                        ρ[thread_idx, i + g, :k][::-1], pearson
+                        )
+                        ρ[thread_idx, i + g, idx + 1 : k] = ρ[thread_idx, i + g, idx : k - 1]
+                        ρ[thread_idx, i + g, idx] = pearson
+                        I[thread_idx, i + g, idx + 1 : k] = I[thread_idx, i + g, idx : k - 1]
+                        I[thread_idx, i + g, idx] = i
+                        # for top-1 case:
+                        #ρ[thread_idx, i + g, 0] = pearson
+                        #I[thread_idx, i + g, 0] = i
+
+                    if i < i + g:
                         # left pearson correlation and left matrix profile index
-                        if pearson > ρ[thread_idx, i + k, 1]:
-                            ρ[thread_idx, i + k, 1] = pearson
-                            I[thread_idx, i + k, 1] = i
+                        if pearson > ρ[thread_idx, i + g, k]:
+                            ρ[thread_idx, i + g, k] = pearson
+                            I[thread_idx, i + g, k] = i
 
                         # right pearson correlation and right matrix profile index
-                        if pearson > ρ[thread_idx, i, 2]:
-                            ρ[thread_idx, i, 2] = pearson
-                            I[thread_idx, i, 2] = i + k
+                        if pearson > ρ[thread_idx, i, k + 1]:
+                            ρ[thread_idx, i, k + 1] = pearson
+                            I[thread_idx, i, k + 1] = i + g
 
     return
 
@@ -406,6 +423,7 @@ def _stump(
             ρ,
             I,
             ignore_trivial,
+            k,
         )
 
     # Reduction of results from all threads

From 9d721982f4a10d3e01dbe3fdf0403fb33372aec7 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 9 May 2022 21:08:13 -0600
Subject: [PATCH 036/151] consider parameter k in non normalized function,
 decorator

---
 stumpy/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index a2a30c043..391ce6b57 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -121,7 +121,7 @@ def norm_func(Q, T, A_norm=None, other_norm=None, normalize=True, p=2.0):
         The desired z-normalized/non-normalized function (or class)
     """
     if exclude is None:
-        exclude = ["normalize", "p"]
+        exclude = ["normalize", "p", "k"]
 
     @functools.wraps(non_norm)
     def outer_wrapper(norm):

From 995559ffe6f49aa20ab71f3b33846b3717ce4e1d Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 9 May 2022 21:11:37 -0600
Subject: [PATCH 037/151] Fixed missing input parameter k in function
 _compute_diagonal

---
 stumpy/stump.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index f0f09e083..45c4e533c 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -42,6 +42,7 @@ def _compute_diagonal(
     ρ,
     I,
     ignore_trivial,
+    k
 ):
     """
     Compute (Numba JIT-compiled) and update the Pearson correlation, ρ, and I

From a047dd002a93b387f664189ca401405b19fdec4f Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 9 May 2022 21:22:13 -0600
Subject: [PATCH 038/151] minor change

---
 stumpy/stump.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index 45c4e533c..5f701b9a5 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -648,10 +648,10 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
 
     out = np.empty((l, 2 * k + 2), dtype=object)
     out[:, :k] = P[:, :k]
-    out[:, k:] = I
+    out[:, k:] = I[:, :]
 
     threshold = 10e-6
-    if core.are_distances_too_small(out[:, 0], threshold=threshold):  # pragma: no cover
+    if core.are_distances_too_small(out[:, :k].ravel(), threshold=threshold):  # pragma: no cover
         logger.warning(f"A large number of values are smaller than {threshold}.")
         logger.warning("For a self-join, try setting `ignore_trivial = True`.")
 

From c6370b6da6e438bdd16e4eefffb8a3e4f71a8c93 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 9 May 2022 21:50:06 -0600
Subject: [PATCH 039/151] Add verbose

---
 stumpy/stump.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index 5f701b9a5..ce5988662 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -646,7 +646,8 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
         k,
     )
 
-    out = np.empty((l, 2 * k + 2), dtype=object)
+    out = np.empty((l, (2 * k) + 2), dtype=object)
+    print(out.shape)
     out[:, :k] = P[:, :k]
     out[:, k:] = I[:, :]
 

From 816441596cbc2d1d85454bdbcba939132d1677b2 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 9 May 2022 22:03:28 -0600
Subject: [PATCH 040/151] minor changes

---
 stumpy/stump.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index ce5988662..2ae3046be 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -42,7 +42,7 @@ def _compute_diagonal(
     ρ,
     I,
     ignore_trivial,
-    k
+    k,
 ):
     """
     Compute (Numba JIT-compiled) and update the Pearson correlation, ρ, and I
@@ -646,13 +646,12 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
         k,
     )
 
-    out = np.empty((l, (2 * k) + 2), dtype=object)
-    print(out.shape)
+    out = np.empty((l, 2 * k + 2), dtype=object)
     out[:, :k] = P[:, :k]
-    out[:, k:] = I[:, :]
+    out[:, k:] = I
 
     threshold = 10e-6
-    if core.are_distances_too_small(out[:, :k].ravel(), threshold=threshold):  # pragma: no cover
+    if core.are_distances_too_small(out[:, 0], threshold=threshold):  # pragma: no cover
         logger.warning(f"A large number of values are smaller than {threshold}.")
         logger.warning("For a self-join, try setting `ignore_trivial = True`.")
 

From 7007953f700dd41cae95d1ea834d0e5850b245b7 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 9 May 2022 22:17:23 -0600
Subject: [PATCH 041/151] Fixed unit test for top-k matrix profile

---
 tests/test_stump.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_stump.py b/tests/test_stump.py
index 1ce70acc5..25b9c5283 100644
--- a/tests/test_stump.py
+++ b/tests/test_stump.py
@@ -253,6 +253,6 @@ def test_stump_self_join_KNN(T_A, T_B):
     naive.replace_inf(comp_mp)
     npt.assert_almost_equal(ref_mp, comp_mp)
 
-    comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True)
+    comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True, k=k)
     naive.replace_inf(comp_mp)
     npt.assert_almost_equal(ref_mp, comp_mp)

From 5b5f21ada054f9d26780199c34f248f034874fe2 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 9 May 2022 22:21:13 -0600
Subject: [PATCH 042/151] Remove parameter k in function non_normalized
 decorator

---
 stumpy/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 391ce6b57..a2a30c043 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -121,7 +121,7 @@ def norm_func(Q, T, A_norm=None, other_norm=None, normalize=True, p=2.0):
         The desired z-normalized/non-normalized function (or class)
     """
     if exclude is None:
-        exclude = ["normalize", "p", "k"]
+        exclude = ["normalize", "p"]
 
     @functools.wraps(non_norm)
     def outer_wrapper(norm):

From f7ee854f733eba01412ed17f6a3cdf8f747d842a Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 9 May 2022 22:56:39 -0600
Subject: [PATCH 043/151] Corret format by black

---
 stumpy/stump.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index 2ae3046be..eb18b7e8a 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -196,9 +196,7 @@ def _compute_diagonal(
                     pearson = 1.0
 
                 if pearson > ρ[thread_idx, i, k - 1]:
-                    idx = k - np.searchsorted(
-                    ρ[thread_idx, i, :k][::-1], pearson
-                    )
+                    idx = k - np.searchsorted(ρ[thread_idx, i, :k][::-1], pearson)
                     ρ[thread_idx, i, idx + 1 : k] = ρ[thread_idx, i, idx : k - 1]
                     ρ[thread_idx, i, idx] = pearson
                     I[thread_idx, i, idx + 1 : k] = I[thread_idx, i, idx : k - 1]
@@ -207,15 +205,19 @@ def _compute_diagonal(
                 if ignore_trivial:  # self-joins only
                     if pearson > ρ[thread_idx, i + g, k - 1]:
                         idx = k - np.searchsorted(
-                        ρ[thread_idx, i + g, :k][::-1], pearson
+                            ρ[thread_idx, i + g, :k][::-1], pearson
                         )
-                        ρ[thread_idx, i + g, idx + 1 : k] = ρ[thread_idx, i + g, idx : k - 1]
+                        ρ[thread_idx, i + g, idx + 1 : k] = ρ[
+                            thread_idx, i + g, idx : k - 1
+                        ]
                         ρ[thread_idx, i + g, idx] = pearson
-                        I[thread_idx, i + g, idx + 1 : k] = I[thread_idx, i + g, idx : k - 1]
+                        I[thread_idx, i + g, idx + 1 : k] = I[
+                            thread_idx, i + g, idx : k - 1
+                        ]
                         I[thread_idx, i + g, idx] = i
                         # for top-1 case:
-                        #ρ[thread_idx, i + g, 0] = pearson
-                        #I[thread_idx, i + g, 0] = i
+                        # ρ[thread_idx, i + g, 0] = pearson
+                        # I[thread_idx, i + g, 0] = i
 
                     if i < i + g:
                         # left pearson correlation and left matrix profile index
@@ -432,10 +434,8 @@ def _stump(
         for i in prange(l):
             # top-k
             for j in range(k):
-                if ρ[0, i, k-1] < ρ[thread_idx, i, j]:
-                    idx = k - np.searchsorted(
-                    ρ[0, i, :k][::-1], ρ[thread_idx, i, j]
-                    )
+                if ρ[0, i, k - 1] < ρ[thread_idx, i, j]:
+                    idx = k - np.searchsorted(ρ[0, i, :k][::-1], ρ[thread_idx, i, j])
                     ρ[0, i, idx + 1 : k] = ρ[0, i, idx : k - 1]
                     ρ[0, i, idx] = ρ[thread_idx, i, j]
 
@@ -453,7 +453,7 @@ def _stump(
     # Convert pearson correlations to distances
     p_norm = np.abs(2 * m * (1 - ρ[0, :, :]))
     for i in prange(p_norm.shape[0]):
-        for j in prange(p_norm.shape[1]): # p_norm.shape[1] is `k + 2`
+        for j in prange(p_norm.shape[1]):  # p_norm.shape[1] is `k + 2`
             if p_norm[i, j] < config.STUMPY_P_NORM_THRESHOLD:
                 p_norm[i, j] = 0.0
 

From 485dba3da38398f27b237142f29adebd870ac003 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 12 May 2022 17:45:26 -0600
Subject: [PATCH 044/151] Use seperate variaboles for left and right profiles

---
 stumpy/stump.py | 87 +++++++++++++++++++++++++++++--------------------
 1 file changed, 51 insertions(+), 36 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index eb18b7e8a..9921a5e7c 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -41,6 +41,10 @@ def _compute_diagonal(
     thread_idx,
     ρ,
     I,
+    ρL,
+    IL,
+    ρR,
+    IR,
     ignore_trivial,
     k,
 ):
@@ -221,14 +225,14 @@ def _compute_diagonal(
 
                     if i < i + g:
                         # left pearson correlation and left matrix profile index
-                        if pearson > ρ[thread_idx, i + g, k]:
-                            ρ[thread_idx, i + g, k] = pearson
-                            I[thread_idx, i + g, k] = i
+                        if pearson > ρL[thread_idx, i + g]:
+                            ρL[thread_idx, i + g] = pearson
+                            IL[thread_idx, i + g] = i
 
                         # right pearson correlation and right matrix profile index
-                        if pearson > ρ[thread_idx, i, k + 1]:
-                            ρ[thread_idx, i, k + 1] = pearson
-                            I[thread_idx, i, k + 1] = i + g
+                        if pearson > ρR[thread_idx, i]:
+                            ρR[thread_idx, i] = pearson
+                            IR[thread_idx, i] = i + g
 
     return
 
@@ -378,8 +382,15 @@ def _stump(
     n_B = T_B.shape[0]
     l = n_A - m + 1
     n_threads = numba.config.NUMBA_NUM_THREADS
-    ρ = np.full((n_threads, l, k + 2), -np.inf, dtype=np.float64)
-    I = np.full((n_threads, l, k + 2), -1, dtype=np.int64)
+
+    ρ = np.full((n_threads, l, k), -np.inf, dtype=np.float64)
+    I = np.full((n_threads, l, k), -1, dtype=np.int64)
+
+    ρL = np.full((n_threads, l), -np.inf, dtype=np.float64)
+    IL = np.full((n_threads, l), -1, dtype=np.float64)
+
+    ρR = np.full((n_threads, l), -np.inf, dtype=np.float64)
+    IR = np.full((n_threads, l), -1, dtype=np.float64)
 
     ndist_counts = core._count_diagonal_ndist(diags, m, n_A, n_B)
     diags_ranges = core._get_array_ranges(ndist_counts, n_threads, False)
@@ -425,6 +436,10 @@ def _stump(
             thread_idx,
             ρ,
             I,
+            ρL,
+            IL,
+            ρR,
+            IR,
             ignore_trivial,
             k,
         )
@@ -434,7 +449,7 @@ def _stump(
         for i in prange(l):
             # top-k
             for j in range(k):
-                if ρ[0, i, k - 1] < ρ[thread_idx, i, j]:
+                if ρ[0, i, k-1] < ρ[thread_idx, i, j]:
                     idx = k - np.searchsorted(ρ[0, i, :k][::-1], ρ[thread_idx, i, j])
                     ρ[0, i, idx + 1 : k] = ρ[0, i, idx : k - 1]
                     ρ[0, i, idx] = ρ[thread_idx, i, j]
@@ -442,24 +457,24 @@ def _stump(
                     I[0, i, idx + 1 : k] = I[0, i, idx : k - 1]
                     I[0, i, idx] = I[thread_idx, i, j]
 
-            if ρ[0, i, k] < ρ[thread_idx, i, k]:
-                ρ[0, i, k] = ρ[thread_idx, i, k]
-                I[0, i, k] = I[thread_idx, i, k]
+            if ρL[0, i] < ρL[thread_idx, i]:
+                ρL[0, i] = ρL[thread_idx, i]
+                IL[0, i] = IL[thread_idx, i]
 
-            if ρ[0, i, k + 1] < ρ[thread_idx, i, k + 1]:
-                ρ[0, i, k + 1] = ρ[thread_idx, i, k + 1]
-                I[0, i, k + 1] = I[thread_idx, i, k + 1]
+            if ρR[0, i] < ρR[thread_idx, i]:
+                ρR[0, i] = ρR[thread_idx, i]
+                IR[0, i] = IR[thread_idx, i]
 
     # Convert pearson correlations to distances
     p_norm = np.abs(2 * m * (1 - ρ[0, :, :]))
     for i in prange(p_norm.shape[0]):
-        for j in prange(p_norm.shape[1]):  # p_norm.shape[1] is `k + 2`
+        for j in prange(p_norm.shape[1]):
             if p_norm[i, j] < config.STUMPY_P_NORM_THRESHOLD:
                 p_norm[i, j] = 0.0
 
     P = np.sqrt(p_norm)
 
-    return P[:, :], I[0, :, :]
+    return P, I[0, :, :], IL[0, :], IR[0, :]
 
 
 @core.non_normalized(aamp)
@@ -627,28 +642,28 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
     else:
         diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1, dtype=np.int64)
 
-    P, I = _stump(
-        T_A,
-        T_B,
-        m,
-        M_T,
-        μ_Q,
-        Σ_T_inverse,
-        σ_Q_inverse,
-        M_T_m_1,
-        μ_Q_m_1,
-        T_A_subseq_isfinite,
-        T_B_subseq_isfinite,
-        T_A_subseq_isconstant,
-        T_B_subseq_isconstant,
-        diags,
-        ignore_trivial,
-        k,
+    P, I, IL, IR = _stump(
+    T_A,
+    T_B,
+    m,
+    M_T,
+    μ_Q,
+    Σ_T_inverse,
+    σ_Q_inverse,
+    M_T_m_1,
+    μ_Q_m_1,
+    T_A_subseq_isfinite,
+    T_B_subseq_isfinite,
+    T_A_subseq_isconstant,
+    T_B_subseq_isconstant,
+    diags,
+    ignore_trivial,
+    k,
     )
 
     out = np.empty((l, 2 * k + 2), dtype=object)
-    out[:, :k] = P[:, :k]
-    out[:, k:] = I
+    out[:, :k] = P
+    out[:, k:] = np.c_[I, IL, IR]
 
     threshold = 10e-6
     if core.are_distances_too_small(out[:, 0], threshold=threshold):  # pragma: no cover

From bc133ca638df71c4542b2351e07297b04b8b6269 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 12 May 2022 18:10:06 -0600
Subject: [PATCH 045/151] store top-k rho in ascending order

---
 stumpy/stump.py | 47 ++++++++++++++++++++++++-----------------------
 1 file changed, 24 insertions(+), 23 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index 9921a5e7c..56b2118ca 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -199,26 +199,26 @@ def _compute_diagonal(
                 if T_B_subseq_isconstant[i + g] and T_A_subseq_isconstant[i]:
                     pearson = 1.0
 
-                if pearson > ρ[thread_idx, i, k - 1]:
-                    idx = k - np.searchsorted(ρ[thread_idx, i, :k][::-1], pearson)
-                    ρ[thread_idx, i, idx + 1 : k] = ρ[thread_idx, i, idx : k - 1]
-                    ρ[thread_idx, i, idx] = pearson
-                    I[thread_idx, i, idx + 1 : k] = I[thread_idx, i, idx : k - 1]
-                    I[thread_idx, i, idx] = i + g
+                if pearson > ρ[thread_idx, i, 0]:
+                    idx = np.searchsorted(ρ[thread_idx, i], pearson)
+                    ρ[thread_idx, i, : idx - 1] = ρ[thread_idx, i, 1 : idx]
+                    ρ[thread_idx, i, idx - 1] = pearson
+
+                    I[thread_idx, i, : idx - 1] = I[thread_idx, i, 1 : idx]
+                    I[thread_idx, i, idx - 1] = i + g
 
                 if ignore_trivial:  # self-joins only
-                    if pearson > ρ[thread_idx, i + g, k - 1]:
-                        idx = k - np.searchsorted(
-                            ρ[thread_idx, i + g, :k][::-1], pearson
-                        )
-                        ρ[thread_idx, i + g, idx + 1 : k] = ρ[
-                            thread_idx, i + g, idx : k - 1
+                    if pearson > ρ[thread_idx, i + g, 0]:
+                        idx = np.searchsorted(ρ[thread_idx, i + g], pearson)
+                        ρ[thread_idx, i + g, : idx - 1] = ρ[
+                            thread_idx, i + g, 1 : idx
                         ]
-                        ρ[thread_idx, i + g, idx] = pearson
-                        I[thread_idx, i + g, idx + 1 : k] = I[
-                            thread_idx, i + g, idx : k - 1
+                        ρ[thread_idx, i + g, idx - 1] = pearson
+
+                        I[thread_idx, i + g, : idx - 1] = I[
+                            thread_idx, i + g, 1 : idx
                         ]
-                        I[thread_idx, i + g, idx] = i
+                        I[thread_idx, i + g, idx - 1] = i
                         # for top-1 case:
                         # ρ[thread_idx, i + g, 0] = pearson
                         # I[thread_idx, i + g, 0] = i
@@ -449,13 +449,14 @@ def _stump(
         for i in prange(l):
             # top-k
             for j in range(k):
-                if ρ[0, i, k-1] < ρ[thread_idx, i, j]:
-                    idx = k - np.searchsorted(ρ[0, i, :k][::-1], ρ[thread_idx, i, j])
-                    ρ[0, i, idx + 1 : k] = ρ[0, i, idx : k - 1]
-                    ρ[0, i, idx] = ρ[thread_idx, i, j]
+                j = k - 1 - j
+                if ρ[0, i, 0] < ρ[thread_idx, i, j]:
+                    idx = np.searchsorted(ρ[0, i], ρ[thread_idx, i, j])
+                    ρ[0, i, : idx - 1] = ρ[0, i, 1 : idx]
+                    ρ[0, i, idx - 1] = ρ[thread_idx, i, j]
 
-                    I[0, i, idx + 1 : k] = I[0, i, idx : k - 1]
-                    I[0, i, idx] = I[thread_idx, i, j]
+                    I[0, i, : idx - 1] = I[0, i, 1 : idx]
+                    I[0, i, idx - 1] = I[thread_idx, i, j]
 
             if ρL[0, i] < ρL[thread_idx, i]:
                 ρL[0, i] = ρL[thread_idx, i]
@@ -474,7 +475,7 @@ def _stump(
 
     P = np.sqrt(p_norm)
 
-    return P, I[0, :, :], IL[0, :], IR[0, :]
+    return P[:, ::-1], I[0, :, ::-1], IL[0, :], IR[0, :]
 
 
 @core.non_normalized(aamp)

From 47a61b2f202e3f2864460086ccf92100168b8f1e Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 12 May 2022 18:23:08 -0600
Subject: [PATCH 046/151] Revise docstrings

---
 stumpy/stump.py | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index 56b2118ca..bdf8c85b7 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -326,12 +326,16 @@ def _stump(
     Returns
     -------
     profile : numpy.ndarray
-        Matrix profile
+        Top-k Matrix profile
 
     indices : numpy.ndarray
-        The first column consists of the matrix profile indices, the second
-        column consists of the left matrix profile indices, and the third
-        column consists of the right matrix profile indices.
+        The top-k matrix profile indices
+
+    left indices : numpy.ndarray
+        The top-1 left matrix profile indices
+
+    right indices : numpy.ndarray
+        The top-1 right matrix profile indices
 
     Notes
     -----
@@ -520,10 +524,10 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
     Returns
     -------
     out : numpy.ndarray
-        The first column consists of the matrix profile, the second column
-        consists of the matrix profile indices, the third column consists of
-        the left matrix profile indices, and the fourth column consists of
-        the right matrix profile indices.
+        The first k columns consists of the top-k matrix profile, the next k columns
+        consists of their corresponding matrix profile indices, the one before
+        last column consists of the top-1 left matrix profile indices, and the
+        last column consists of the top-1 right matrix profile indices.
 
     See Also
     --------

From d4dc04a5caea088cd6a9a619830af7c517f5348d Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 12 May 2022 18:37:27 -0600
Subject: [PATCH 047/151] Correct docstrings

---
 stumpy/stump.py | 37 +++++++++++++++++++++++++------------
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index bdf8c85b7..d49296ac5 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -49,9 +49,9 @@ def _compute_diagonal(
     k,
 ):
     """
-    Compute (Numba JIT-compiled) and update the Pearson correlation, ρ, and I
-    sequentially along individual diagonals using a single thread and avoiding race
-    conditions
+    Compute (Numba JIT-compiled) and update the (top-k) Pearson correlation, ρ, and I,
+    and, the left ρ and the left I, the right ρ and the right I sequentially along
+    individual diagonals using a single thread and avoiding race conditions.
 
     Parameters
     ----------
@@ -121,10 +121,22 @@ def _compute_diagonal(
         The thread index
 
     ρ : numpy.ndarray
-        The Pearson correlations
+        The top-k Pearson correlations, sorted in ascending order per row
 
     I : numpy.ndarray
-        The matrix profile indices
+        The top-k matrix profile indices
+
+    ρL : numpy.ndarray
+        The top-1 left Pearson correlations
+
+    IL : numpy.ndarray
+        The top-1 left matrix profile indices
+
+    ρR : numpy.ndarray
+        The top-1 left Pearson correlations
+
+    IR : numpy.ndarray
+        The top-1 right matrix profile indices
 
     ignore_trivial : bool
         Set to `True` if this is a self-join. Otherwise, for AB-join, set this to
@@ -263,8 +275,8 @@ def _stump(
 ):
     """
     A Numba JIT-compiled version of STOMPopt with Pearson correlations for parallel
-    computation of the matrix profile, matrix profile indices, left matrix profile
-    indices, and right matrix profile indices.
+    computation of the top-k matrix profile, top-k matrix profile indices, top-1
+    left matrix profile indices, and top-1 right matrix profile indices.
 
     Parameters
     ----------
@@ -326,16 +338,16 @@ def _stump(
     Returns
     -------
     profile : numpy.ndarray
-        Top-k Matrix profile
+        Top-k matrix profile
 
     indices : numpy.ndarray
-        The top-k matrix profile indices
+        Top-k matrix profile indices
 
     left indices : numpy.ndarray
-        The top-1 left matrix profile indices
+        Top-1 left matrix profile indices
 
     right indices : numpy.ndarray
-        The top-1 right matrix profile indices
+        Top-1 right matrix profile indices
 
     Notes
     -----
@@ -417,7 +429,8 @@ def _stump(
     cov_d[:] = cov_d - μ_Q_m_1
 
     for thread_idx in prange(n_threads):
-        # Compute and update cov, I within a single thread to avoiding race conditions
+        # Compute and update pearson correlations and matrix profile indices
+        # within a single thread to avoid race conditions
         _compute_diagonal(
             T_A,
             T_B,

From a123540664c93cacc5cf1b006422b42fba9c9069 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 12 May 2022 18:38:47 -0600
Subject: [PATCH 048/151] Correct formats

---
 stumpy/stump.py | 48 ++++++++++++++++++++++--------------------------
 1 file changed, 22 insertions(+), 26 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index d49296ac5..f31d0c0f7 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -213,23 +213,19 @@ def _compute_diagonal(
 
                 if pearson > ρ[thread_idx, i, 0]:
                     idx = np.searchsorted(ρ[thread_idx, i], pearson)
-                    ρ[thread_idx, i, : idx - 1] = ρ[thread_idx, i, 1 : idx]
+                    ρ[thread_idx, i, : idx - 1] = ρ[thread_idx, i, 1:idx]
                     ρ[thread_idx, i, idx - 1] = pearson
 
-                    I[thread_idx, i, : idx - 1] = I[thread_idx, i, 1 : idx]
+                    I[thread_idx, i, : idx - 1] = I[thread_idx, i, 1:idx]
                     I[thread_idx, i, idx - 1] = i + g
 
                 if ignore_trivial:  # self-joins only
                     if pearson > ρ[thread_idx, i + g, 0]:
                         idx = np.searchsorted(ρ[thread_idx, i + g], pearson)
-                        ρ[thread_idx, i + g, : idx - 1] = ρ[
-                            thread_idx, i + g, 1 : idx
-                        ]
+                        ρ[thread_idx, i + g, : idx - 1] = ρ[thread_idx, i + g, 1:idx]
                         ρ[thread_idx, i + g, idx - 1] = pearson
 
-                        I[thread_idx, i + g, : idx - 1] = I[
-                            thread_idx, i + g, 1 : idx
-                        ]
+                        I[thread_idx, i + g, : idx - 1] = I[thread_idx, i + g, 1:idx]
                         I[thread_idx, i + g, idx - 1] = i
                         # for top-1 case:
                         # ρ[thread_idx, i + g, 0] = pearson
@@ -469,10 +465,10 @@ def _stump(
                 j = k - 1 - j
                 if ρ[0, i, 0] < ρ[thread_idx, i, j]:
                     idx = np.searchsorted(ρ[0, i], ρ[thread_idx, i, j])
-                    ρ[0, i, : idx - 1] = ρ[0, i, 1 : idx]
+                    ρ[0, i, : idx - 1] = ρ[0, i, 1:idx]
                     ρ[0, i, idx - 1] = ρ[thread_idx, i, j]
 
-                    I[0, i, : idx - 1] = I[0, i, 1 : idx]
+                    I[0, i, : idx - 1] = I[0, i, 1:idx]
                     I[0, i, idx - 1] = I[thread_idx, i, j]
 
             if ρL[0, i] < ρL[thread_idx, i]:
@@ -661,22 +657,22 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
         diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1, dtype=np.int64)
 
     P, I, IL, IR = _stump(
-    T_A,
-    T_B,
-    m,
-    M_T,
-    μ_Q,
-    Σ_T_inverse,
-    σ_Q_inverse,
-    M_T_m_1,
-    μ_Q_m_1,
-    T_A_subseq_isfinite,
-    T_B_subseq_isfinite,
-    T_A_subseq_isconstant,
-    T_B_subseq_isconstant,
-    diags,
-    ignore_trivial,
-    k,
+        T_A,
+        T_B,
+        m,
+        M_T,
+        μ_Q,
+        Σ_T_inverse,
+        σ_Q_inverse,
+        M_T_m_1,
+        μ_Q_m_1,
+        T_A_subseq_isfinite,
+        T_B_subseq_isfinite,
+        T_A_subseq_isconstant,
+        T_B_subseq_isconstant,
+        diags,
+        ignore_trivial,
+        k,
     )
 
     out = np.empty((l, 2 * k + 2), dtype=object)

From 1dff66f983346ae23430f76cf5c1f16b2c46ea98 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Thu, 12 May 2022 18:40:00 -0600
Subject: [PATCH 049/151] Full coverage of test_stump unit test


From cf48b6961eab3c01180a84a476dcd5e8fcd626ee Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 13 May 2022 10:37:44 -0600
Subject: [PATCH 050/151] Change function considering new input/output
 structure

---
 stumpy/scrump.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 002847507..75790c70a 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -609,7 +609,7 @@ def update(self):
         if self._chunk_idx < self._n_chunks:
             start_idx, stop_idx = self._chunk_diags_ranges[self._chunk_idx]
 
-            P, I = _stump(
+            P, I, IL, IR = _stump(
                 self._T_A,
                 self._T_B,
                 self._m,
@@ -625,8 +625,11 @@ def update(self):
                 self._T_B_subseq_isconstant,
                 self._diags[start_idx:stop_idx],
                 self._ignore_trivial,
+                k=1,
             )
 
+            I = np.c_[I, IL, IR]
+            
             # Update matrix profile and indices
             for i in range(self._P.shape[0]):
                 if self._P[i, 0] > P[i, 0]:

From 7d16ce6a883b38808a7e6f93c41c82755500465a Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 13 May 2022 10:45:24 -0600
Subject: [PATCH 051/151] Add two more outputs returned by _stump

---
 stumpy/stump.py | 51 +++++++++++++++++++++++++++++++------------------
 1 file changed, 32 insertions(+), 19 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index f31d0c0f7..348085a4e 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -481,14 +481,26 @@ def _stump(
 
     # Convert pearson correlations to distances
     p_norm = np.abs(2 * m * (1 - ρ[0, :, :]))
+    p_norm_L = np.abs(2 * m * (1 - ρL[0, :]))
+    p_norm_R = np.abs(2 * m * (1 - ρR[0, :]))
+
     for i in prange(p_norm.shape[0]):
         for j in prange(p_norm.shape[1]):
             if p_norm[i, j] < config.STUMPY_P_NORM_THRESHOLD:
                 p_norm[i, j] = 0.0
 
+        if p_norm_L[i] < config.STUMPY_P_NORM_THRESHOLD:
+            p_norm_L[i] = 0.0
+
+        if p_norm_R[i] < config.STUMPY_P_NORM_THRESHOLD:
+            p_norm_R[i] = 0.0
+
     P = np.sqrt(p_norm)
+    PL = np.sqrt(p_norm_L)
+    PR = np.sqrt(p_norm_R)
+
 
-    return P[:, ::-1], I[0, :, ::-1], IL[0, :], IR[0, :]
+    return P[:, ::-1], I[0, :, ::-1], PL, IL[0, :], PR, IR[0, :]
 
 
 @core.non_normalized(aamp)
@@ -656,26 +668,27 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
     else:
         diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1, dtype=np.int64)
 
-    P, I, IL, IR = _stump(
-        T_A,
-        T_B,
-        m,
-        M_T,
-        μ_Q,
-        Σ_T_inverse,
-        σ_Q_inverse,
-        M_T_m_1,
-        μ_Q_m_1,
-        T_A_subseq_isfinite,
-        T_B_subseq_isfinite,
-        T_A_subseq_isconstant,
-        T_B_subseq_isconstant,
-        diags,
-        ignore_trivial,
-        k,
+    P, I, PL, IL, PR, IR = _stump(
+                T_A,
+                T_B,
+                m,
+                M_T,
+                μ_Q,
+                Σ_T_inverse,
+                σ_Q_inverse,
+                M_T_m_1,
+                μ_Q_m_1,
+                T_A_subseq_isfinite,
+                T_B_subseq_isfinite,
+                T_A_subseq_isconstant,
+                T_B_subseq_isconstant,
+                diags,
+                ignore_trivial,
+                k,
     )
 
-    out = np.empty((l, 2 * k + 2), dtype=object)
+    out = np.empty((l, 2 * k + 2), dtype=object) # last two columns are to
+    # store left and right matrix profile indices
     out[:, :k] = P
     out[:, k:] = np.c_[I, IL, IR]
 

From 61d38b6b747ff96820140335163b5d02c76f0eaf Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 13 May 2022 10:50:48 -0600
Subject: [PATCH 052/151] Update/Correct docstrings

---
 stumpy/stump.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index 348085a4e..b9743613b 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -133,7 +133,7 @@ def _compute_diagonal(
         The top-1 left matrix profile indices
 
     ρR : numpy.ndarray
-        The top-1 left Pearson correlations
+        The top-1 right Pearson correlations
 
     IR : numpy.ndarray
         The top-1 right matrix profile indices
@@ -272,7 +272,8 @@ def _stump(
     """
     A Numba JIT-compiled version of STOMPopt with Pearson correlations for parallel
     computation of the top-k matrix profile, top-k matrix profile indices, top-1
-    left matrix profile indices, and top-1 right matrix profile indices.
+    left matrix profile and matrix profile indices, and top-1 right matrix profile
+    and matrix profile indices.
 
     Parameters
     ----------
@@ -339,9 +340,15 @@ def _stump(
     indices : numpy.ndarray
         Top-k matrix profile indices
 
+    left profile : numpy.ndarray
+        Top-1 left matrix profile
+
     left indices : numpy.ndarray
         Top-1 left matrix profile indices
 
+    right profile : numpy.ndarray
+        Top-1 right matrix profile
+
     right indices : numpy.ndarray
         Top-1 right matrix profile indices
 
@@ -499,7 +506,6 @@ def _stump(
     PL = np.sqrt(p_norm_L)
     PR = np.sqrt(p_norm_R)
 
-
     return P[:, ::-1], I[0, :, ::-1], PL, IL[0, :], PR, IR[0, :]
 
 
@@ -546,9 +552,9 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
     -------
     out : numpy.ndarray
         The first k columns consists of the top-k matrix profile, the next k columns
-        consists of their corresponding matrix profile indices, the one before
-        last column consists of the top-1 left matrix profile indices, and the
-        last column consists of the top-1 right matrix profile indices.
+        consists of their corresponding matrix profile indices, the column at
+        numpy indexing 2k contains top-1 left matrix profile indices and the last
+        column, at numpy indexing 2k+1, contains top-1 right matrix profile indices.
 
     See Also
     --------

From 1a469a5230720bdc4d86287db174c0196fd9cf8d Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 13 May 2022 10:53:16 -0600
Subject: [PATCH 053/151] Correct callee function _stump

---
 stumpy/scrump.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 75790c70a..df53d8244 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -609,7 +609,7 @@ def update(self):
         if self._chunk_idx < self._n_chunks:
             start_idx, stop_idx = self._chunk_diags_ranges[self._chunk_idx]
 
-            P, I, IL, IR = _stump(
+            P, I, PL, IL, PR, IR = _stump(
                 self._T_A,
                 self._T_B,
                 self._m,
@@ -628,8 +628,9 @@ def update(self):
                 k=1,
             )
 
+            P = np.c_[P, PL, PR]
             I = np.c_[I, IL, IR]
-            
+
             # Update matrix profile and indices
             for i in range(self._P.shape[0]):
                 if self._P[i, 0] > P[i, 0]:

From 2149abf0f4d2b0f109246b1a90d1106fa4d76f89 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 13 May 2022 10:53:58 -0600
Subject: [PATCH 054/151] Fix format

---
 stumpy/stump.py      | 34 +++++++++++++++++-----------------
 stumpy/test_stump.py |  0
 2 files changed, 17 insertions(+), 17 deletions(-)
 create mode 100644 stumpy/test_stump.py

diff --git a/stumpy/stump.py b/stumpy/stump.py
index b9743613b..cb10e65c4 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -675,25 +675,25 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
         diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1, dtype=np.int64)
 
     P, I, PL, IL, PR, IR = _stump(
-                T_A,
-                T_B,
-                m,
-                M_T,
-                μ_Q,
-                Σ_T_inverse,
-                σ_Q_inverse,
-                M_T_m_1,
-                μ_Q_m_1,
-                T_A_subseq_isfinite,
-                T_B_subseq_isfinite,
-                T_A_subseq_isconstant,
-                T_B_subseq_isconstant,
-                diags,
-                ignore_trivial,
-                k,
+        T_A,
+        T_B,
+        m,
+        M_T,
+        μ_Q,
+        Σ_T_inverse,
+        σ_Q_inverse,
+        M_T_m_1,
+        μ_Q_m_1,
+        T_A_subseq_isfinite,
+        T_B_subseq_isfinite,
+        T_A_subseq_isconstant,
+        T_B_subseq_isconstant,
+        diags,
+        ignore_trivial,
+        k,
     )
 
-    out = np.empty((l, 2 * k + 2), dtype=object) # last two columns are to
+    out = np.empty((l, 2 * k + 2), dtype=object)  # last two columns are to
     # store left and right matrix profile indices
     out[:, :k] = P
     out[:, k:] = np.c_[I, IL, IR]
diff --git a/stumpy/test_stump.py b/stumpy/test_stump.py
new file mode 100644
index 000000000..e69de29bb

From 364f280d7a4db08ede32151b201e856d344bdef6 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 13 May 2022 11:19:02 -0600
Subject: [PATCH 055/151] Fixed number of inputs passed to _stump

---
 stumpy/stumped.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index 09557e318..7f1f67e51 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -248,6 +248,7 @@ def stumped(dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True,
                 T_B_subseq_isconstant_future,
                 diags_futures[i],
                 ignore_trivial,
+                1,
             )
         )
 

From e983e1fbda3ca017d453a2acb97d997314ad9a70 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 13 May 2022 11:29:00 -0600
Subject: [PATCH 056/151] Fixed number of outputs returned by the function

---
 stumpy/stumped.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index 7f1f67e51..db30eea59 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -253,7 +253,11 @@ def stumped(dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True,
         )
 
     results = dask_client.gather(futures)
-    profile, indices = results[0]
+    profile, indices, profile_L, indices_L, profile_R, indices_R  = results[0]
+
+    profile = np.c_[profile, profile_L, profile_R]
+    indices = np.c_[indices, indices_L, indices_R]
+
     for i in range(1, len(hosts)):
         P, I = results[i]
         for col in range(P.shape[1]):  # pragma: no cover

From ef2bc6578bfb4f7e04c74dcda3563d32fd76497a Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 13 May 2022 11:36:56 -0600
Subject: [PATCH 057/151] Fixed number of returned outputs

---
 stumpy/stumped.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index db30eea59..1fbd7be49 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -259,7 +259,9 @@ def stumped(dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True,
     indices = np.c_[indices, indices_L, indices_R]
 
     for i in range(1, len(hosts)):
-        P, I = results[i]
+        P, I, PL, IL, PR, IR = results[i]
+        P = np.c_[P, PL, PR]
+        I = np.c_[I, IL, IR]
         for col in range(P.shape[1]):  # pragma: no cover
             cond = P[:, col] < profile[:, col]
             profile[:, col] = np.where(cond, P[:, col], profile[:, col])

From f7d4a8fcd298600c7a51fe8178020a675b349349 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 13 May 2022 11:39:31 -0600
Subject: [PATCH 058/151] Correct format

---
 stumpy/stumped.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index 1fbd7be49..6ca40707c 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -253,7 +253,7 @@ def stumped(dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True,
         )
 
     results = dask_client.gather(futures)
-    profile, indices, profile_L, indices_L, profile_R, indices_R  = results[0]
+    profile, indices, profile_L, indices_L, profile_R, indices_R = results[0]
 
     profile = np.c_[profile, profile_L, profile_R]
     indices = np.c_[indices, indices_L, indices_R]

From 3dccc9a244797c3324cfef54a0b3e1d07c36d6e5 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 13 May 2022 12:21:00 -0600
Subject: [PATCH 059/151] Exclude parameter 'k' in non-normalized decorator

After updating non-normalized functions to return top-k matrix  profile,
the parameter "k" will be removed from such exclusion.
---
 stumpy/core.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index a2a30c043..9c4296ab9 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -121,7 +121,8 @@ def norm_func(Q, T, A_norm=None, other_norm=None, normalize=True, p=2.0):
         The desired z-normalized/non-normalized function (or class)
     """
     if exclude is None:
-        exclude = ["normalize", "p"]
+        exclude = ["normalize", "p", "k"] # remove "k" after updating
+        # non-normalized function to accept "k" for top-k matrix profile 
 
     @functools.wraps(non_norm)
     def outer_wrapper(norm):

From a430364aa2cfc77263f7328386dc5c9ea0048945 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 13 May 2022 12:25:17 -0600
Subject: [PATCH 060/151] Correct format

---
 stumpy/core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 9c4296ab9..f9a77a07f 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -121,8 +121,8 @@ def norm_func(Q, T, A_norm=None, other_norm=None, normalize=True, p=2.0):
         The desired z-normalized/non-normalized function (or class)
     """
     if exclude is None:
-        exclude = ["normalize", "p", "k"] # remove "k" after updating
-        # non-normalized function to accept "k" for top-k matrix profile 
+        exclude = ["normalize", "p", "k"]  # remove "k" after updating
+        # non-normalized function to accept "k" for top-k matrix profile
 
     @functools.wraps(non_norm)
     def outer_wrapper(norm):

From 4f0194384b38e38a6b76e949d6aac0bd06fa441f Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 13 May 2022 15:17:22 -0600
Subject: [PATCH 061/151] Fixed dtype of matrix profile indices

---
 stumpy/stump.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index cb10e65c4..683194e9b 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -406,10 +406,10 @@ def _stump(
     I = np.full((n_threads, l, k), -1, dtype=np.int64)
 
     ρL = np.full((n_threads, l), -np.inf, dtype=np.float64)
-    IL = np.full((n_threads, l), -1, dtype=np.float64)
+    IL = np.full((n_threads, l), -1, dtype=np.int64)
 
     ρR = np.full((n_threads, l), -np.inf, dtype=np.float64)
-    IR = np.full((n_threads, l), -1, dtype=np.float64)
+    IR = np.full((n_threads, l), -1, dtype=np.int64)
 
     ndist_counts = core._count_diagonal_ndist(diags, m, n_A, n_B)
     diags_ranges = core._get_array_ranges(ndist_counts, n_threads, False)

From aebe5a31920fed46be8cac8f46c50cbc58315e0c Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 13 May 2022 16:33:19 -0600
Subject: [PATCH 062/151] Add pagam no cover

---
 tests/naive.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/naive.py b/tests/naive.py
index 67d1fb27c..b2d8894f7 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -164,7 +164,7 @@ def searchsorted(a, v):
     if len(indices):
         return indices.min()
     else:
-        return len(a)
+        return len(a)  # pragma: no cover
 
 
 def stump(T_A, m, T_B=None, exclusion_zone=None, row_wise=False, k=1):

From de295af807c8b114cdce77ee254e62ed34bcf485 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 13 May 2022 16:37:23 -0600
Subject: [PATCH 063/151] Minor change

---
 tests/naive.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index b2d8894f7..4a5ed789a 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -163,8 +163,8 @@ def searchsorted(a, v):
     indices = np.flatnonzero(v < a)
     if len(indices):
         return indices.min()
-    else:
-        return len(a)  # pragma: no cover
+    else:  # pragma: no cover
+        return len(a)
 
 
 def stump(T_A, m, T_B=None, exclusion_zone=None, row_wise=False, k=1):

From 1d35aea6326fab28d4d099d1b6e40db7d4fd037c Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 15 May 2022 21:48:13 -0600
Subject: [PATCH 064/151] Use range to move in reverse

---
 stumpy/stump.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index 683194e9b..6f47fe698 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -468,8 +468,7 @@ def _stump(
     for thread_idx in range(1, n_threads):
         for i in prange(l):
             # top-k
-            for j in range(k):
-                j = k - 1 - j
+            for j in range(k - 1, -1, -1):
                 if ρ[0, i, 0] < ρ[thread_idx, i, j]:
                     idx = np.searchsorted(ρ[0, i], ρ[thread_idx, i, j])
                     ρ[0, i, : idx - 1] = ρ[0, i, 1:idx]

From 33374167498eee362655c984ae16cedb34552204 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 17 May 2022 11:22:50 -0600
Subject: [PATCH 065/151] seperate variables for left and right

---
 stumpy/stump.py | 89 ++++++++++++++++++++++++++++++-------------------
 1 file changed, 55 insertions(+), 34 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index 97334eb5a..ba8d3a958 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -41,6 +41,10 @@ def _compute_diagonal(
     thread_idx,
     ρ,
     I,
+    ρL,
+    IL,
+    ρR,
+    IR,
     ignore_trivial,
 ):
     """
@@ -190,25 +194,25 @@ def _compute_diagonal(
                 if T_B_subseq_isconstant[i + k] and T_A_subseq_isconstant[i]:
                     pearson = 1.0
 
-                if pearson > ρ[thread_idx, i, 0]:
-                    ρ[thread_idx, i, 0] = pearson
-                    I[thread_idx, i, 0] = i + k
+                if pearson > ρ[thread_idx, i]:
+                    ρ[thread_idx, i] = pearson
+                    I[thread_idx, i] = i + k
 
                 if ignore_trivial:  # self-joins only
-                    if pearson > ρ[thread_idx, i + k, 0]:
-                        ρ[thread_idx, i + k, 0] = pearson
-                        I[thread_idx, i + k, 0] = i
+                    if pearson > ρ[thread_idx, i + k]:
+                        ρ[thread_idx, i + k] = pearson
+                        I[thread_idx, i + k] = i
 
                     if i < i + k:
                         # left pearson correlation and left matrix profile index
-                        if pearson > ρ[thread_idx, i + k, 1]:
-                            ρ[thread_idx, i + k, 1] = pearson
-                            I[thread_idx, i + k, 1] = i
+                        if pearson > ρL[thread_idx, i + k]:
+                            ρL[thread_idx, i + k] = pearson
+                            IL[thread_idx, i + k] = i
 
                         # right pearson correlation and right matrix profile index
-                        if pearson > ρ[thread_idx, i, 2]:
-                            ρ[thread_idx, i, 2] = pearson
-                            I[thread_idx, i, 2] = i + k
+                        if pearson > ρR[thread_idx, i]:
+                            ρR[thread_idx, i] = pearson
+                            IR[thread_idx, i] = i + k
 
     return
 
@@ -353,8 +357,14 @@ def _stump(
     n_B = T_B.shape[0]
     l = n_A - m + 1
     n_threads = numba.config.NUMBA_NUM_THREADS
-    ρ = np.full((n_threads, l, 3), -np.inf, dtype=np.float64)
-    I = np.full((n_threads, l, 3), -1, dtype=np.int64)
+    ρ = np.full((n_threads, l), -np.inf, dtype=np.float64)
+    I = np.full((n_threads, l), -1, dtype=np.int64)
+
+    ρL = np.full((n_threads, l), -np.inf, dtype=np.float64)
+    IL = np.full((n_threads, l), -1, dtype=np.int64)
+
+    ρR = np.full((n_threads, l), -np.inf, dtype=np.float64)
+    IR = np.full((n_threads, l), -1, dtype=np.int64)
 
     ndist_counts = core._count_diagonal_ndist(diags, m, n_A, n_B)
     diags_ranges = core._get_array_ranges(ndist_counts, n_threads, False)
@@ -400,36 +410,47 @@ def _stump(
             thread_idx,
             ρ,
             I,
+            ρL,
+            IL,
+            ρR,
+            IR,
             ignore_trivial,
         )
 
     # Reduction of results from all threads
     for thread_idx in range(1, n_threads):
         for i in prange(l):
-            if ρ[0, i, 0] < ρ[thread_idx, i, 0]:
-                ρ[0, i, 0] = ρ[thread_idx, i, 0]
-                I[0, i, 0] = I[thread_idx, i, 0]
+            if ρ[0, i] < ρ[thread_idx, i]:
+                ρ[0, i] = ρ[thread_idx, i]
+                I[0, i] = I[thread_idx, i]
             # left pearson correlation and left matrix profile indices
-            if ρ[0, i, 1] < ρ[thread_idx, i, 1]:
-                ρ[0, i, 1] = ρ[thread_idx, i, 1]
-                I[0, i, 1] = I[thread_idx, i, 1]
+            if ρL[0, i] < ρL[thread_idx, i]:
+                ρL[0, i] = ρL[thread_idx, i]
+                IL[0, i] = IL[thread_idx, i]
             # right pearson correlation and right matrix profile indices
-            if ρ[0, i, 2] < ρ[thread_idx, i, 2]:
-                ρ[0, i, 2] = ρ[thread_idx, i, 2]
-                I[0, i, 2] = I[thread_idx, i, 2]
+            if ρR[0, i] < ρR[thread_idx, i]:
+                ρR[0, i] = ρR[thread_idx, i]
+                IR[0, i] = IR[thread_idx, i]
 
     # Convert pearson correlations to distances
-    p_norm = np.abs(2 * m * (1 - ρ[0, :, :]))
+    p_norm = np.abs(2 * m * (1 - ρ[0, :]))
+    p_norm_L = np.abs(2 * m * (1 - ρL[0, :]))
+    p_norm_R = np.abs(2 * m * (1 - ρR[0, :]))
+
     for i in prange(p_norm.shape[0]):
-        if p_norm[i, 0] < config.STUMPY_P_NORM_THRESHOLD:
-            p_norm[i, 0] = 0.0
-        if p_norm[i, 1] < config.STUMPY_P_NORM_THRESHOLD:
-            p_norm[i, 1] = 0.0
-        if p_norm[i, 2] < config.STUMPY_P_NORM_THRESHOLD:
-            p_norm[i, 2] = 0.0
+        if p_norm[i] < config.STUMPY_P_NORM_THRESHOLD:
+            p_norm[i] = 0.0
+        if p_norm_L[i] < config.STUMPY_P_NORM_THRESHOLD:
+            p_norm_L[i] = 0.0
+        if p_norm_R[i] < config.STUMPY_P_NORM_THRESHOLD:
+            p_norm_R[i] = 0.0
+
     P = np.sqrt(p_norm)
+    PL = np.sqrt(p_norm_L)
+    PR = np.sqrt(p_norm_R)
+
 
-    return P[:, :], I[0, :, :]
+    return P, I, PL, IL, PR, IR
 
 
 @core.non_normalized(aamp)
@@ -594,7 +615,7 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0):
     else:
         diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1, dtype=np.int64)
 
-    P, I = _stump(
+    P, I, PL, IL, PR, IR = _stump(
         T_A,
         T_B,
         m,
@@ -612,8 +633,8 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0):
         ignore_trivial,
     )
 
-    out[:, 0] = P[:, 0]
-    out[:, 1:] = I
+    out[:, 0] = P
+    out[:, 1:] = np.c_[I, IL, IR]
 
     threshold = 10e-6
     if core.are_distances_too_small(out[:, 0], threshold=threshold):  # pragma: no cover

From d09ea2359fcf5f355203abd34bc28a1a86ed5129 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 17 May 2022 15:39:36 -0600
Subject: [PATCH 066/151] use seperate variables for left and right

---
 stumpy/stump.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index ba8d3a958..6972bff87 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -634,7 +634,7 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0):
     )
 
     out[:, 0] = P
-    out[:, 1:] = np.c_[I, IL, IR]
+    out[:, 1:] = np.column_stack((I, IL, IR))
 
     threshold = 10e-6
     if core.are_distances_too_small(out[:, 0], threshold=threshold):  # pragma: no cover

From dbd7b8c2ed36d92d6c9c88f7200f0799ce8e4ea6 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 17 May 2022 16:38:03 -0600
Subject: [PATCH 067/151] replace numpy column_stack with c_

---
 stumpy/stump.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index 6972bff87..ba8d3a958 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -634,7 +634,7 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0):
     )
 
     out[:, 0] = P
-    out[:, 1:] = np.column_stack((I, IL, IR))
+    out[:, 1:] = np.c_[I, IL, IR]
 
     threshold = 10e-6
     if core.are_distances_too_small(out[:, 0], threshold=threshold):  # pragma: no cover

From 56ab764228da8ee1b828d1771acc4c95fccaf239 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 17 May 2022 16:51:29 -0600
Subject: [PATCH 068/151] Fix bug

---
 stumpy/stump.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index ba8d3a958..121988d25 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -357,6 +357,7 @@ def _stump(
     n_B = T_B.shape[0]
     l = n_A - m + 1
     n_threads = numba.config.NUMBA_NUM_THREADS
+
     ρ = np.full((n_threads, l), -np.inf, dtype=np.float64)
     I = np.full((n_threads, l), -1, dtype=np.int64)
 
@@ -450,7 +451,7 @@ def _stump(
     PR = np.sqrt(p_norm_R)
 
 
-    return P, I, PL, IL, PR, IR
+    return P, I[0, :], PL, IL[0, :], PR, IR[0, :]
 
 
 @core.non_normalized(aamp)
@@ -633,9 +634,11 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0):
         ignore_trivial,
     )
 
+
     out[:, 0] = P
     out[:, 1:] = np.c_[I, IL, IR]
 
+
     threshold = 10e-6
     if core.are_distances_too_small(out[:, 0], threshold=threshold):  # pragma: no cover
         logger.warning(f"A large number of values are smaller than {threshold}.")

From e817e5f0dd1316105b93a96d9be28b659a58367d Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 17 May 2022 21:02:08 -0600
Subject: [PATCH 069/151] Remove a wrongly created file

---
 stumpy/test_stump.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 stumpy/test_stump.py

diff --git a/stumpy/test_stump.py b/stumpy/test_stump.py
deleted file mode 100644
index e69de29bb..000000000

From c1e39256972a03f0ee1b014e1b8e20efa2d811ba Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 17 May 2022 21:04:59 -0600
Subject: [PATCH 070/151] Remove parameter k in non normalized decorator

---
 stumpy/core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index f9a77a07f..753b0affa 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -121,9 +121,9 @@ def norm_func(Q, T, A_norm=None, other_norm=None, normalize=True, p=2.0):
         The desired z-normalized/non-normalized function (or class)
     """
     if exclude is None:
-        exclude = ["normalize", "p", "k"]  # remove "k" after updating
-        # non-normalized function to accept "k" for top-k matrix profile
+        exclude = ["normalize", "p"]
 
+    
     @functools.wraps(non_norm)
     def outer_wrapper(norm):
         @functools.wraps(norm)

From aa08176e4cc1ecd90dc47e3ef851103088136a11 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 17 May 2022 21:09:58 -0600
Subject: [PATCH 071/151] Add parameter k to arguments of non normalized
 function

Temporarily, the parameter k is added to the arguments of non-normalized
function `aamp` so that the tests can be passed for now. This will be
handled after completing the normalized version `stump`.
---
 stumpy/aamp.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/stumpy/aamp.py b/stumpy/aamp.py
index 201e4413b..b00c8cbf1 100644
--- a/stumpy/aamp.py
+++ b/stumpy/aamp.py
@@ -240,7 +240,8 @@ def _aamp(
     return np.power(P[0, :, :], 1.0 / p), I[0, :, :]
 
 
-def aamp(T_A, m, T_B=None, ignore_trivial=True, p=2.0):
+def aamp(T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1): # k=1 is temporary
+    # and this function needs to be changed to return top-k
     """
     Compute the non-normalized (i.e., without z-normalization) matrix profile
 

From 37a9f2c91979fbd2db35d27d4c946eb1ca31c08f Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 17 May 2022 22:12:34 -0600
Subject: [PATCH 072/151] Replace numpy c_ with column_stack

---
 stumpy/scrump.py  | 4 ++--
 stumpy/stump.py   | 2 +-
 stumpy/stumped.py | 8 ++++----
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index df53d8244..25c4e4e3f 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -628,8 +628,8 @@ def update(self):
                 k=1,
             )
 
-            P = np.c_[P, PL, PR]
-            I = np.c_[I, IL, IR]
+            P = np.column_stack((P, PL, PR))
+            I = np.column_stack((I, IL, IR))
 
             # Update matrix profile and indices
             for i in range(self._P.shape[0]):
diff --git a/stumpy/stump.py b/stumpy/stump.py
index 6f47fe698..449c35200 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -695,7 +695,7 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
     out = np.empty((l, 2 * k + 2), dtype=object)  # last two columns are to
     # store left and right matrix profile indices
     out[:, :k] = P
-    out[:, k:] = np.c_[I, IL, IR]
+    out[:, k:] = np.column_stack((I, IL, IR))
 
     threshold = 10e-6
     if core.are_distances_too_small(out[:, 0], threshold=threshold):  # pragma: no cover
diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index 6ca40707c..0c1c34e07 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -255,13 +255,13 @@ def stumped(dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True,
     results = dask_client.gather(futures)
     profile, indices, profile_L, indices_L, profile_R, indices_R = results[0]
 
-    profile = np.c_[profile, profile_L, profile_R]
-    indices = np.c_[indices, indices_L, indices_R]
+    profile = np.column_stack((profile, profile_L, profile_R))
+    indices = np.column_stack((indices, indices_L, indices_R))
 
     for i in range(1, len(hosts)):
         P, I, PL, IL, PR, IR = results[i]
-        P = np.c_[P, PL, PR]
-        I = np.c_[I, IL, IR]
+        P = np.column_stack((P, PL, PR))
+        I = np.column_stack((I, IL, IR))
         for col in range(P.shape[1]):  # pragma: no cover
             cond = P[:, col] < profile[:, col]
             profile[:, col] = np.where(cond, P[:, col], profile[:, col])

From 8c0e76ecd2eeea875fb7c80eb4cfe5703740a333 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 17 May 2022 23:29:21 -0600
Subject: [PATCH 073/151] Minor changes

- Improve docstrings
- Reverse rho and I before return
- Improve comments
---
 stumpy/stump.py | 47 ++++++++++++++++++++++++-----------------------
 1 file changed, 24 insertions(+), 23 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index 449c35200..6fe2b7e41 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -49,9 +49,9 @@ def _compute_diagonal(
     k,
 ):
     """
-    Compute (Numba JIT-compiled) and update the (top-k) Pearson correlation, ρ, and I,
-    and, the left ρ and the left I, the right ρ and the right I sequentially along
-    individual diagonals using a single thread and avoiding race conditions.
+    Compute (Numba JIT-compiled) and update the (top-k) Pearson correlation (ρ),
+    ρL, ρR, I, IL, and IR sequentially along individual diagonals using a single
+    thread and avoiding race conditions.
 
     Parameters
     ----------
@@ -121,10 +121,10 @@ def _compute_diagonal(
         The thread index
 
     ρ : numpy.ndarray
-        The top-k Pearson correlations, sorted in ascending order per row
+        The (top-k) Pearson correlations, sorted in ascending order per row
 
     I : numpy.ndarray
-        The top-k matrix profile indices
+        The (top-k) matrix profile indices
 
     ρL : numpy.ndarray
         The top-1 left Pearson correlations
@@ -144,7 +144,7 @@ def _compute_diagonal(
 
     k : int
         The number of smallest elements in distance profile that should be stored
-        for constructing top-k matrix profile.
+        for constructing the top-k matrix profile.
 
     Returns
     -------
@@ -227,9 +227,6 @@ def _compute_diagonal(
 
                         I[thread_idx, i + g, : idx - 1] = I[thread_idx, i + g, 1:idx]
                         I[thread_idx, i + g, idx - 1] = i
-                        # for top-1 case:
-                        # ρ[thread_idx, i + g, 0] = pearson
-                        # I[thread_idx, i + g, 0] = i
 
                     if i < i + g:
                         # left pearson correlation and left matrix profile index
@@ -271,9 +268,9 @@ def _stump(
 ):
     """
     A Numba JIT-compiled version of STOMPopt with Pearson correlations for parallel
-    computation of the top-k matrix profile, top-k matrix profile indices, top-1
-    left matrix profile and matrix profile indices, and top-1 right matrix profile
-    and matrix profile indices.
+    computation of the (top-k) matrix profile, the (top-k) matrix profile indices,
+    the top-1 left matrix profile and matrix profile indices, and the top-1 right
+    matrix profile and matrix profile indices.
 
     Parameters
     ----------
@@ -468,7 +465,7 @@ def _stump(
     for thread_idx in range(1, n_threads):
         for i in prange(l):
             # top-k
-            for j in range(k - 1, -1, -1):
+            for j in range(k - 1, -1, -1): # reverse iteration to preserve order in ties
                 if ρ[0, i, 0] < ρ[thread_idx, i, j]:
                     idx = np.searchsorted(ρ[0, i], ρ[thread_idx, i, j])
                     ρ[0, i, : idx - 1] = ρ[0, i, 1:idx]
@@ -485,8 +482,12 @@ def _stump(
                 ρR[0, i] = ρR[thread_idx, i]
                 IR[0, i] = IR[thread_idx, i]
 
-    # Convert pearson correlations to distances
-    p_norm = np.abs(2 * m * (1 - ρ[0, :, :]))
+    # The arrays ρ (and so I) should be reversed since ρ is in ascending order.
+    ρ = ρ[0, :, ::-1]
+    I = I[0, :, ::-1]
+
+    # Convert pearson correlations to distances.
+    p_norm = np.abs(2 * m * (1 -  ρ))
     p_norm_L = np.abs(2 * m * (1 - ρL[0, :]))
     p_norm_R = np.abs(2 * m * (1 - ρR[0, :]))
 
@@ -505,7 +506,7 @@ def _stump(
     PL = np.sqrt(p_norm_L)
     PR = np.sqrt(p_norm_R)
 
-    return P[:, ::-1], I[0, :, ::-1], PL, IL[0, :], PR, IR[0, :]
+    return P, I, PL, IL[0, :], PR, IR[0, :]
 
 
 @core.non_normalized(aamp)
@@ -514,8 +515,8 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
     Compute the z-normalized matrix profile
 
     This is a convenience wrapper around the Numba JIT-compiled parallelized
-    `_stump` function which computes the matrix profile according to STOMPopt with
-    Pearson correlations.
+    `_stump` function which computes the (top-k) matrix profile according to
+    STOMPopt with Pearson correlations.
 
     Parameters
     ----------
@@ -545,15 +546,15 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
 
     k : int, default 1
         The number of smallest elements in distance profile that should be stored
-        for constructing top-k matrix profile.
+        for constructing the top-k matrix profile.
 
     Returns
     -------
     out : numpy.ndarray
-        The first k columns consists of the top-k matrix profile, the next k columns
-        consists of their corresponding matrix profile indices, the column at
-        numpy indexing 2k contains top-1 left matrix profile indices and the last
-        column, at numpy indexing 2k+1, contains top-1 right matrix profile indices.
+        The first k columns contain the top-k matrix profile, the next k columns
+        contain their corresponding matrix profile indices, the column at
+        numpy indexing 2k contains the top-1 left matrix profile indices and the last
+        column, at numpy indexing 2k+1, contains the top-1 right matrix profile indices.
 
     See Also
     --------

From df4c5d1ad8db3109eb8316c99314785cb02f5325 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 17 May 2022 23:47:54 -0600
Subject: [PATCH 074/151] Correct Format

---
 stumpy/aamp.py  | 2 +-
 stumpy/core.py  | 1 -
 stumpy/stump.py | 6 ++++--
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/stumpy/aamp.py b/stumpy/aamp.py
index b00c8cbf1..807c3164b 100644
--- a/stumpy/aamp.py
+++ b/stumpy/aamp.py
@@ -240,7 +240,7 @@ def _aamp(
     return np.power(P[0, :, :], 1.0 / p), I[0, :, :]
 
 
-def aamp(T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1): # k=1 is temporary
+def aamp(T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1):  # k=1 is temporary
     # and this function needs to be changed to return top-k
     """
     Compute the non-normalized (i.e., without z-normalization) matrix profile
diff --git a/stumpy/core.py b/stumpy/core.py
index 753b0affa..a2a30c043 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -123,7 +123,6 @@ def norm_func(Q, T, A_norm=None, other_norm=None, normalize=True, p=2.0):
     if exclude is None:
         exclude = ["normalize", "p"]
 
-    
     @functools.wraps(non_norm)
     def outer_wrapper(norm):
         @functools.wraps(norm)
diff --git a/stumpy/stump.py b/stumpy/stump.py
index 6fe2b7e41..3e241a11e 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -465,7 +465,9 @@ def _stump(
     for thread_idx in range(1, n_threads):
         for i in prange(l):
             # top-k
-            for j in range(k - 1, -1, -1): # reverse iteration to preserve order in ties
+            for j in range(
+                k - 1, -1, -1
+            ):  # reverse iteration to preserve order in ties
                 if ρ[0, i, 0] < ρ[thread_idx, i, j]:
                     idx = np.searchsorted(ρ[0, i], ρ[thread_idx, i, j])
                     ρ[0, i, : idx - 1] = ρ[0, i, 1:idx]
@@ -487,7 +489,7 @@ def _stump(
     I = I[0, :, ::-1]
 
     # Convert pearson correlations to distances.
-    p_norm = np.abs(2 * m * (1 -  ρ))
+    p_norm = np.abs(2 * m * (1 - ρ))
     p_norm_L = np.abs(2 * m * (1 - ρL[0, :]))
     p_norm_R = np.abs(2 * m * (1 - ρR[0, :]))
 

From c5c881bebc2ebffb9d55a1491ebff6f239b73553 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 18 May 2022 00:22:26 -0600
Subject: [PATCH 075/151] minor improvement of docstring

---
 stumpy/stump.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index 3e241a11e..ae6a21a15 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -553,8 +553,8 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
     Returns
     -------
     out : numpy.ndarray
-        The first k columns contain the top-k matrix profile, the next k columns
-        contain their corresponding matrix profile indices, the column at
+        The first k columns consist of the top-k matrix profile, the next k columns
+        consist of their corresponding matrix profile indices, the column at
         numpy indexing 2k contains the top-1 left matrix profile indices and the last
         column, at numpy indexing 2k+1, contains the top-1 right matrix profile indices.
 

From d9dcdc037168ef4f7cd4a9ef4cda491a94f24495 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 18 May 2022 00:24:49 -0600
Subject: [PATCH 076/151] Add parameter k to the arguments of function

the function will be revised to return top-k matrix profile
---
 stumpy/aamped.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/stumpy/aamped.py b/stumpy/aamped.py
index d6bf6d97b..c158c9423 100644
--- a/stumpy/aamped.py
+++ b/stumpy/aamped.py
@@ -12,7 +12,8 @@
 logger = logging.getLogger(__name__)
 
 
-def aamped(dask_client, T_A, m, T_B=None, ignore_trivial=True, p=2.0):
+def aamped(dask_client, T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1):
+    # function needs to be revised to return top-k matix profile
     """
     Compute the non-normalized (i.e., without z-normalization) matrix profile
 

From c6b81f0410769cd700cf68dbbb8f473dd50bfabf Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 18 May 2022 00:52:31 -0600
Subject: [PATCH 077/151] Add parameter k to arguments

Temporarily add parameter k to avoid non-normalized decorator test failure
---
 stumpy/stumped.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index 0c1c34e07..a48f6a957 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -14,7 +14,8 @@
 
 
 @core.non_normalized(aamped)
-def stumped(dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0):
+def stumped(dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
+    # the function needs to be revisd to return top-k matrix profile
     """
     Compute the z-normalized matrix profile with a distributed dask cluster
 

From 4ffc7fca9733cccb6dddab528a1a5d2ca996089c Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 18 May 2022 00:53:55 -0600
Subject: [PATCH 078/151] Correct format

---
 stumpy/stumped.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index a48f6a957..e922536f3 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -14,7 +14,9 @@
 
 
 @core.non_normalized(aamped)
-def stumped(dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
+def stumped(
+    dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1
+):
     # the function needs to be revisd to return top-k matrix profile
     """
     Compute the z-normalized matrix profile with a distributed dask cluster

From 102c627f64eb5736f528cc31bba8bb01f8645628 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 18 May 2022 02:03:34 -0600
Subject: [PATCH 079/151] Remove parameter k from arguements

---
 stumpy/aamped.py  | 3 +--
 stumpy/stumped.py | 4 +---
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/stumpy/aamped.py b/stumpy/aamped.py
index c158c9423..d6bf6d97b 100644
--- a/stumpy/aamped.py
+++ b/stumpy/aamped.py
@@ -12,8 +12,7 @@
 logger = logging.getLogger(__name__)
 
 
-def aamped(dask_client, T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1):
-    # function needs to be revised to return top-k matix profile
+def aamped(dask_client, T_A, m, T_B=None, ignore_trivial=True, p=2.0):
     """
     Compute the non-normalized (i.e., without z-normalization) matrix profile
 
diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index e922536f3..6cdfc5aed 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -15,9 +15,7 @@
 
 @core.non_normalized(aamped)
 def stumped(
-    dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1
-):
-    # the function needs to be revisd to return top-k matrix profile
+    dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0):
     """
     Compute the z-normalized matrix profile with a distributed dask cluster
 

From a37f793306d54123af0660c428ec845a880b3930 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 18 May 2022 02:24:53 -0600
Subject: [PATCH 080/151] Add one new unit test

---
 tests/test_stumped.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/tests/test_stumped.py b/tests/test_stumped.py
index ca53829fc..02e914436 100644
--- a/tests/test_stumped.py
+++ b/tests/test_stumped.py
@@ -608,3 +608,20 @@ def test_stumped_two_subsequences_nan_inf_A_B_join_swap(
         naive.replace_inf(ref_mp)
         naive.replace_inf(comp_mp)
         npt.assert_almost_equal(ref_mp, comp_mp)
+
+
+@pytest.mark.filterwarnings("ignore:numpy.dtype size changed")
+@pytest.mark.filterwarnings("ignore:numpy.ufunc size changed")
+@pytest.mark.filterwarnings("ignore:numpy.ndarray size changed")
+@pytest.mark.filterwarnings("ignore:\\s+Port 8787 is already in use:UserWarning")
+@pytest.mark.parametrize("T_A, T_B", test_data)
+def test_stumped_self_join_KNN(T_A, T_B, dask_cluster):
+    with Client(dask_cluster) as dask_client:
+        k = 3
+        m = 3
+        zone = int(np.ceil(m / 4))
+        ref_mp = naive.stump(T_B, m, exclusion_zone=zone, k=k)
+        comp_mp = stumped(dask_client, T_B, m, ignore_trivial=True, k=k)
+        naive.replace_inf(ref_mp)
+        naive.replace_inf(comp_mp)
+        npt.assert_almost_equal(ref_mp, comp_mp)

From 0755af4ddcdf5ad5a331a1b535af53f879dfc160 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 18 May 2022 11:31:12 -0600
Subject: [PATCH 081/151] Add parameter k=1 to arguments

This is to avoid unit test failure in non-normalized decorator. After
finalizing the normalized function, the non normalized functions will
be revised to return top-k matrix profile.
---
 stumpy/aamped.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/stumpy/aamped.py b/stumpy/aamped.py
index d6bf6d97b..d833ee8b3 100644
--- a/stumpy/aamped.py
+++ b/stumpy/aamped.py
@@ -12,7 +12,8 @@
 logger = logging.getLogger(__name__)
 
 
-def aamped(dask_client, T_A, m, T_B=None, ignore_trivial=True, p=2.0):
+def aamped(dask_client, T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1):
+    # function needs to be revised to return top-k matrix profile
     """
     Compute the non-normalized (i.e., without z-normalization) matrix profile
 

From ca9fdcffcf94d5f0541b74e845fc5e11ee9481ae Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 18 May 2022 14:16:06 -0600
Subject: [PATCH 082/151] Revise stumped to return top-k matrix profile

---
 stumpy/stumped.py | 50 +++++++++++++++++++++++++++++++++--------------
 1 file changed, 35 insertions(+), 15 deletions(-)

diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index 6cdfc5aed..2b826ba71 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -15,7 +15,7 @@
 
 @core.non_normalized(aamped)
 def stumped(
-    dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0):
+    dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
     """
     Compute the z-normalized matrix profile with a distributed dask cluster
 
@@ -55,6 +55,10 @@ def stumped(
         The p-norm to apply for computing the Minkowski distance. This parameter is
         ignored when `normalize == True`.
 
+    k : int
+        The number of smallest elements in distance profile that should be stored
+        for constructing the top-k matrix profile.
+
     Returns
     -------
     out : numpy.ndarray
@@ -184,7 +188,6 @@ def stumped(
     l = n_A - m + 1
 
     excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
-    out = np.empty((l, 4), dtype=object)
 
     hosts = list(dask_client.ncores().keys())
     nworkers = len(hosts)
@@ -249,27 +252,44 @@ def stumped(
                 T_B_subseq_isconstant_future,
                 diags_futures[i],
                 ignore_trivial,
-                1,
+                k,
             )
         )
 
     results = dask_client.gather(futures)
     profile, indices, profile_L, indices_L, profile_R, indices_R = results[0]
 
-    profile = np.column_stack((profile, profile_L, profile_R))
-    indices = np.column_stack((indices, indices_L, indices_R))
-
     for i in range(1, len(hosts)):
         P, I, PL, IL, PR, IR = results[i]
-        P = np.column_stack((P, PL, PR))
-        I = np.column_stack((I, IL, IR))
-        for col in range(P.shape[1]):  # pragma: no cover
-            cond = P[:, col] < profile[:, col]
-            profile[:, col] = np.where(cond, P[:, col], profile[:, col])
-            indices[:, col] = np.where(cond, I[:, col], indices[:, col])
-
-    out[:, 0] = profile[:, 0]
-    out[:, 1:4] = indices
+        # Update top-k matrix profile, alternative approach:
+        # np.argsort(np.concatenate(profile, P), kind='mergesort')
+        prof = profile.copy()
+        ind = indices.copy()
+        for j in range(l):
+            u, w = 0, 0
+            for idx in range(k):
+                if prof[j, u] <= P[j, w]:
+                    profile[j, idx] = prof[j, u]
+                    indices[j, idx] = ind[j, u]
+                    u += 1
+                else:
+                    profile[j, idx] = P[j, w]
+                    indices[j, idx] = I[j, w]
+                    w += 1
+
+        # Update top-1 left matrix profile and matrix profile index
+        cond = PL < profile_L
+        profile_L = np.where(cond, PL, profile_L)
+        indices_L = np.where(cond, IL, indices_L)
+
+        # Update top-1 right matrix profile and matrix profile index
+        cond = PR < profile_R
+        profile_R = np.where(cond, PR, profile_R)
+        indices_R = np.where(cond, IR, indices_R)
+
+    out = np.empty((l, 2 * k + 2), dtype=object)
+    out[:, :k] = profile
+    out[:, k:] = np.column_stack((indices, indices_L, indices_R))
 
     # Delete data from Dask cluster
     dask_client.cancel(T_A_future)

From 9408631f397ef6578dc2d21205e44bb5a45c38f6 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 18 May 2022 14:16:56 -0600
Subject: [PATCH 083/151] Correct format

---
 stumpy/stumped.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index 2b826ba71..037c4ba52 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -15,7 +15,8 @@
 
 @core.non_normalized(aamped)
 def stumped(
-    dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
+    dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1
+):
     """
     Compute the z-normalized matrix profile with a distributed dask cluster
 

From 435d9b88ed52bfd8800ff5055375661287b3871d Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 18 May 2022 16:19:44 -0600
Subject: [PATCH 084/151] several minor changes

---
 stumpy/aamp.py      |  4 ++--
 stumpy/scrump.py    |  2 +-
 stumpy/stump.py     | 16 ++++++++--------
 stumpy/stumped.py   | 12 ++++++------
 tests/test_stump.py |  2 +-
 5 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/stumpy/aamp.py b/stumpy/aamp.py
index 807c3164b..87568f365 100644
--- a/stumpy/aamp.py
+++ b/stumpy/aamp.py
@@ -240,8 +240,8 @@ def _aamp(
     return np.power(P[0, :, :], 1.0 / p), I[0, :, :]
 
 
-def aamp(T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1):  # k=1 is temporary
-    # and this function needs to be changed to return top-k
+def aamp(T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1):
+    # function needs to be changed to return top-k matrix profile
     """
     Compute the non-normalized (i.e., without z-normalization) matrix profile
 
diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 25c4e4e3f..9b26478c2 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -454,6 +454,7 @@ def __init__(
         s=None,
         normalize=True,
         p=2.0,
+        k=1, # class needs to be revised to return (top-k) matrix profile
     ):
         """
         Initialize the `scrump` object
@@ -625,7 +626,6 @@ def update(self):
                 self._T_B_subseq_isconstant,
                 self._diags[start_idx:stop_idx],
                 self._ignore_trivial,
-                k=1,
             )
 
             P = np.column_stack((P, PL, PR))
diff --git a/stumpy/stump.py b/stumpy/stump.py
index ae6a21a15..60d965590 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -269,8 +269,8 @@ def _stump(
     """
     A Numba JIT-compiled version of STOMPopt with Pearson correlations for parallel
     computation of the (top-k) matrix profile, the (top-k) matrix profile indices,
-    the top-1 left matrix profile and matrix profile indices, and the top-1 right
-    matrix profile and matrix profile indices.
+    the top-1 left matrix profile and its matrix profile index, and the top-1 right
+    matrix profile and its matrix profile index.
 
     Parameters
     ----------
@@ -327,7 +327,7 @@ def _stump(
 
     k : int
         The number of smallest elements in distance profile that should be stored
-        for constructing top-k matrix profile.
+        for constructing the top-k matrix profile.
 
     Returns
     -------
@@ -430,7 +430,7 @@ def _stump(
 
     for thread_idx in prange(n_threads):
         # Compute and update pearson correlations and matrix profile indices
-        # within a single thread to avoid race conditions
+        # within a single thread and avoiding race conditions
         _compute_diagonal(
             T_A,
             T_B,
@@ -484,12 +484,12 @@ def _stump(
                 ρR[0, i] = ρR[thread_idx, i]
                 IR[0, i] = IR[thread_idx, i]
 
-    # The arrays ρ (and so I) should be reversed since ρ is in ascending order.
-    ρ = ρ[0, :, ::-1]
+    # Convert top-k pearson correlations to distances. The arrays ρ (and so I) should
+    # be reversed since ρ is in ascending order.
+    p_norm = np.abs(2 * m * (1 - ρ[0, :, ::-1]))
     I = I[0, :, ::-1]
 
-    # Convert pearson correlations to distances.
-    p_norm = np.abs(2 * m * (1 - ρ))
+    # Convert top-1 left/right pearson correlations to distances.
     p_norm_L = np.abs(2 * m * (1 - ρL[0, :]))
     p_norm_R = np.abs(2 * m * (1 - ρR[0, :]))
 
diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index 037c4ba52..9aa815e6e 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -18,10 +18,10 @@ def stumped(
     dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1
 ):
     """
-    Compute the z-normalized matrix profile with a distributed dask cluster
+    Compute the z-normalized (top-k) matrix profile with a distributed dask cluster
 
     This is a highly distributed implementation around the Numba JIT-compiled
-    parallelized `_stump` function which computes the matrix profile according
+    parallelized `_stump` function which computes the (top-k) matrix profile according
     to STOMPopt with Pearson correlations.
 
     Parameters
@@ -63,10 +63,10 @@ def stumped(
     Returns
     -------
     out : numpy.ndarray
-        The first column consists of the matrix profile, the second column
-        consists of the matrix profile indices, the third column consists of
-        the left matrix profile indices, and the fourth column consists of
-        the right matrix profile indices.
+        The first k columns consist of the top-k matrix profile, the next k columns
+        consist of their corresponding matrix profile indices, the column at
+        numpy indexing 2k contains the top-1 left matrix profile indices and the last
+        column, at numpy indexing 2k+1, contains the top-1 right matrix profile indices.
 
     See Also
     --------
diff --git a/tests/test_stump.py b/tests/test_stump.py
index 25b9c5283..af2a2315e 100644
--- a/tests/test_stump.py
+++ b/tests/test_stump.py
@@ -244,7 +244,7 @@ def test_stump_nan_zero_mean_self_join():
 
 @pytest.mark.parametrize("T_A, T_B", test_data)
 def test_stump_self_join_KNN(T_A, T_B):
-    k = 2
+    k = 3
     m = 3
     zone = int(np.ceil(m / 4))
     ref_mp = naive.stump(T_B, m, exclusion_zone=zone, k=k)

From c6580c8a8dc1d2cdc49ca4724c16d0649ed95028 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 18 May 2022 16:20:45 -0600
Subject: [PATCH 085/151] Correct Format

---
 stumpy/scrump.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 9b26478c2..6a4f7b534 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -454,7 +454,7 @@ def __init__(
         s=None,
         normalize=True,
         p=2.0,
-        k=1, # class needs to be revised to return (top-k) matrix profile
+        k=1,  # class needs to be revised to return (top-k) matrix profile
     ):
         """
         Initialize the `scrump` object

From e4b0473e0fa38f696a84aac3f2da9938eaeb198d Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 18 May 2022 16:26:25 -0600
Subject: [PATCH 086/151] Remove k from arguments

---
 stumpy/scrump.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 6a4f7b534..e62658fc9 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -454,7 +454,6 @@ def __init__(
         s=None,
         normalize=True,
         p=2.0,
-        k=1,  # class needs to be revised to return (top-k) matrix profile
     ):
         """
         Initialize the `scrump` object

From 8bf05ee3d7534488f0769ff2b4bf95eb1f818fc7 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 18 May 2022 16:33:27 -0600
Subject: [PATCH 087/151] Pass 1 as value of parameter k to a class method to
 avoid unit test failure

---
 stumpy/scrump.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index e62658fc9..ea8808696 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -625,6 +625,7 @@ def update(self):
                 self._T_B_subseq_isconstant,
                 self._diags[start_idx:stop_idx],
                 self._ignore_trivial,
+                1 # revise module to accept parameter k for top-k matrix profile
             )
 
             P = np.column_stack((P, PL, PR))

From f12261cafd9637a1253444d0c321f61c8ee59b23 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 18 May 2022 16:34:42 -0600
Subject: [PATCH 088/151] Pass 1 as the value of parameter k to avoid unit test
 failure

---
 stumpy/scrump.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index ea8808696..9fcb51e4b 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -625,7 +625,7 @@ def update(self):
                 self._T_B_subseq_isconstant,
                 self._diags[start_idx:stop_idx],
                 self._ignore_trivial,
-                1 # revise module to accept parameter k for top-k matrix profile
+                1,  # revise module to accept parameter k for top-k matrix profile
             )
 
             P = np.column_stack((P, PL, PR))

From 695343e4e7ff927b1793de418bc0b2d3dc45b5df Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 18 May 2022 22:38:25 -0600
Subject: [PATCH 089/151] Use np searchsort to avoid copying arrays into new
 memory

---
 stumpy/stumped.py | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index 9aa815e6e..f6932325b 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -262,21 +262,18 @@ def stumped(
 
     for i in range(1, len(hosts)):
         P, I, PL, IL, PR, IR = results[i]
-        # Update top-k matrix profile, alternative approach:
-        # np.argsort(np.concatenate(profile, P), kind='mergesort')
-        prof = profile.copy()
-        ind = indices.copy()
         for j in range(l):
-            u, w = 0, 0
-            for idx in range(k):
-                if prof[j, u] <= P[j, w]:
-                    profile[j, idx] = prof[j, u]
-                    indices[j, idx] = ind[j, u]
-                    u += 1
-                else:
-                    profile[j, idx] = P[j, w]
-                    indices[j, idx] = I[j, w]
-                    w += 1
+            # Uodate profie[j]
+            for D, ind in zip(P[j], I[j]):
+                if D >= profile[j, -1]:
+                    break  # no need to update profile[j] from this point.
+                idx = np.searchsorted(profile[j], D, side="right")  # might be optimized
+                # with help of checkpoint idx from previous iteration.
+                profile[j, idx + 1 :] = profile[j, idx : k - 1]
+                profile[j, idx] = D
+
+                indices[j, idx + 1 :] = indices[j, idx : k - 1]
+                indices[j, idx] = ind
 
         # Update top-1 left matrix profile and matrix profile index
         cond = PL < profile_L

From f4a37faa71b57cbd6258c981a513ec8a3d2e20b3 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Wed, 18 May 2022 23:42:43 -0600
Subject: [PATCH 090/151] All tests passed


From b6d376319b027706dd771693e63811524605b1be Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 20 May 2022 16:11:29 -0600
Subject: [PATCH 091/151] Replace nested for loops with numpy operations

---
 stumpy/stumped.py | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index f6932325b..34d665fc7 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -257,23 +257,27 @@ def stumped(
             )
         )
 
+    profile = np.empty((l, 2 * k))
+    indices = np.empty((l, 2 * k))
+
     results = dask_client.gather(futures)
-    profile, indices, profile_L, indices_L, profile_R, indices_R = results[0]
+    (
+        profile[:, :k],
+        indices[:, :k],
+        profile_L,
+        indices_L,
+        profile_R,
+        indices_R,
+    ) = results[0]
 
     for i in range(1, len(hosts)):
         P, I, PL, IL, PR, IR = results[i]
-        for j in range(l):
-            # Uodate profie[j]
-            for D, ind in zip(P[j], I[j]):
-                if D >= profile[j, -1]:
-                    break  # no need to update profile[j] from this point.
-                idx = np.searchsorted(profile[j], D, side="right")  # might be optimized
-                # with help of checkpoint idx from previous iteration.
-                profile[j, idx + 1 :] = profile[j, idx : k - 1]
-                profile[j, idx] = D
-
-                indices[j, idx + 1 :] = indices[j, idx : k - 1]
-                indices[j, idx] = ind
+
+        profile[:, k:] = P
+        indices[:, k:] = I
+        idx = np.argsort(profile, axis=1)
+        profile = np.take_along_axis(profile, idx, axis=1)
+        indices = np.take_along_axis(indices, idx, axis=1)
 
         # Update top-1 left matrix profile and matrix profile index
         cond = PL < profile_L
@@ -286,8 +290,8 @@ def stumped(
         indices_R = np.where(cond, IR, indices_R)
 
     out = np.empty((l, 2 * k + 2), dtype=object)
-    out[:, :k] = profile
-    out[:, k:] = np.column_stack((indices, indices_L, indices_R))
+    out[:, :k] = profile[:, :k]
+    out[:, k:] = np.column_stack((indices[:, :k], indices_L, indices_R))
 
     # Delete data from Dask cluster
     dask_client.cancel(T_A_future)

From cc9c0769fde6e270ff903d69459e241207e57da2 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 20 May 2022 18:28:46 -0600
Subject: [PATCH 092/151] Change the order of some variables in inputs and
 outputs

---
 stumpy/stump.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/stumpy/stump.py b/stumpy/stump.py
index 60d965590..9f37edc8b 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -40,10 +40,10 @@ def _compute_diagonal(
     diags_stop_idx,
     thread_idx,
     ρ,
-    I,
     ρL,
-    IL,
     ρR,
+    I,
+    IL,
     IR,
     ignore_trivial,
     k,
@@ -123,18 +123,18 @@ def _compute_diagonal(
     ρ : numpy.ndarray
         The (top-k) Pearson correlations, sorted in ascending order per row
 
-    I : numpy.ndarray
-        The (top-k) matrix profile indices
-
     ρL : numpy.ndarray
         The top-1 left Pearson correlations
 
-    IL : numpy.ndarray
-        The top-1 left matrix profile indices
-
     ρR : numpy.ndarray
         The top-1 right Pearson correlations
 
+    I : numpy.ndarray
+        The (top-k) matrix profile indices
+
+    IL : numpy.ndarray
+        The top-1 left matrix profile indices
+
     IR : numpy.ndarray
         The top-1 right matrix profile indices
 
@@ -452,10 +452,10 @@ def _stump(
             diags_ranges[thread_idx, 1],
             thread_idx,
             ρ,
-            I,
             ρL,
-            IL,
             ρR,
+            I,
+            IL,
             IR,
             ignore_trivial,
             k,
@@ -508,7 +508,7 @@ def _stump(
     PL = np.sqrt(p_norm_L)
     PR = np.sqrt(p_norm_R)
 
-    return P, I, PL, IL[0, :], PR, IR[0, :]
+    return P, PL, PR, I, IL[0, :], IR[0, :]
 
 
 @core.non_normalized(aamp)
@@ -676,7 +676,7 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
     else:
         diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1, dtype=np.int64)
 
-    P, I, PL, IL, PR, IR = _stump(
+    P, PL, PR, I, IL, IR = _stump(
         T_A,
         T_B,
         m,

From a4d456691dacf788739db9dfdf3796ddc568f794 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 20 May 2022 18:47:50 -0600
Subject: [PATCH 093/151] Revise docstrings and comments

---
 stumpy/aamp.py    |  4 ++++
 stumpy/aamped.py  |  4 ++++
 stumpy/stump.py   | 42 +++++++++++++++++++++++-------------------
 stumpy/stumped.py | 19 ++++++++++++-------
 4 files changed, 43 insertions(+), 26 deletions(-)

diff --git a/stumpy/aamp.py b/stumpy/aamp.py
index 87568f365..428c3d4bd 100644
--- a/stumpy/aamp.py
+++ b/stumpy/aamp.py
@@ -268,6 +268,10 @@ def aamp(T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1):
     p : float, default 2.0
         The p-norm to apply for computing the Minkowski distance.
 
+    k : int, default 1
+        The number of top `k` smallest distances used to construct the matrix profile.
+        Note that this will increase the total computational time and memory usage when k > 1.
+
     Returns
     -------
     out : numpy.ndarray
diff --git a/stumpy/aamped.py b/stumpy/aamped.py
index d833ee8b3..ad147b42f 100644
--- a/stumpy/aamped.py
+++ b/stumpy/aamped.py
@@ -47,6 +47,10 @@ def aamped(dask_client, T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1):
     p : float, default 2.0
         The p-norm to apply for computing the Minkowski distance.
 
+    k : int, default 1
+        The number of top `k` smallest distances used to construct the matrix profile.
+        Note that this will increase the total computational time and memory usage when k > 1.
+
     Returns
     -------
     out : numpy.ndarray
diff --git a/stumpy/stump.py b/stumpy/stump.py
index 9f37edc8b..bcf0d4103 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -143,8 +143,8 @@ def _compute_diagonal(
         `False`. Default is `True`.
 
     k : int
-        The number of smallest elements in distance profile that should be stored
-        for constructing the top-k matrix profile.
+        The number of top `k` smallest distances used to construct the matrix profile.
+        Note that this will increase the total computational time and memory usage when k > 1.
 
     Returns
     -------
@@ -326,28 +326,28 @@ def _stump(
         `False`. Default is `True`.
 
     k : int
-        The number of smallest elements in distance profile that should be stored
-        for constructing the top-k matrix profile.
+        The number of top `k` smallest distances used to construct the matrix profile.
+        Note that this will increase the total computational time and memory usage when k > 1.
 
     Returns
     -------
     profile : numpy.ndarray
-        Top-k matrix profile
+        The (top-k) matrix profile
 
     indices : numpy.ndarray
-        Top-k matrix profile indices
+        The (top-k) matrix profile indices
 
     left profile : numpy.ndarray
-        Top-1 left matrix profile
+        The (top-1) left matrix profile
 
     left indices : numpy.ndarray
-        Top-1 left matrix profile indices
+        The (top-1) left matrix profile indices
 
     right profile : numpy.ndarray
-        Top-1 right matrix profile
+        The (top-1) right matrix profile
 
     right indices : numpy.ndarray
-        Top-1 right matrix profile indices
+        The (top-1) right matrix profile indices
 
     Notes
     -----
@@ -484,12 +484,11 @@ def _stump(
                 ρR[0, i] = ρR[thread_idx, i]
                 IR[0, i] = IR[thread_idx, i]
 
-    # Convert top-k pearson correlations to distances. The arrays ρ (and so I) should
-    # be reversed since ρ is in ascending order.
+    # Reverse top-k rho (and its associated I) to be in descending order and
+    # then convert from Pearson correlations to Euclidean distances (ascending order)
     p_norm = np.abs(2 * m * (1 - ρ[0, :, ::-1]))
     I = I[0, :, ::-1]
 
-    # Convert top-1 left/right pearson correlations to distances.
     p_norm_L = np.abs(2 * m * (1 - ρL[0, :]))
     p_norm_R = np.abs(2 * m * (1 - ρR[0, :]))
 
@@ -547,16 +546,21 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
         ignored when `normalize == True`.
 
     k : int, default 1
-        The number of smallest elements in distance profile that should be stored
-        for constructing the top-k matrix profile.
+        The number of top `k` smallest distances used to construct the matrix profile.
+        Note that this will increase the total computational time and memory usage when k > 1.
 
     Returns
     -------
     out : numpy.ndarray
-        The first k columns consist of the top-k matrix profile, the next k columns
-        consist of their corresponding matrix profile indices, the column at
-        numpy indexing 2k contains the top-1 left matrix profile indices and the last
-        column, at numpy indexing 2k+1, contains the top-1 right matrix profile indices.
+        When k = 1 (default), the first column consists of the matrix profile,
+        the second column consists of the matrix profile indices, the third column
+        consists of the left matrix profile indices, and the fourth column consists of
+        the right matrix profile indices. However, when k > 1, the output array will
+        contain exactly 2 * k + 2 columns. The first k columns (i.e., out[:, :k]) consists
+        of the top-k matrix profile, the next set of k columns (i.e., out[:, k:2k]) consists
+        of the corresponding top-k matrix profile indices, and the last two columns
+        (i.e., out[:, 2k] and out[:, 2k+1] or, equivalently, out[:, -2] and out[:, -1]) correspond to
+        the top-1 left matrix profile indices and the top-1 right matrix profile indices, respectively.
 
     See Also
     --------
diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index 34d665fc7..99a1ba0b1 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -56,17 +56,22 @@ def stumped(
         The p-norm to apply for computing the Minkowski distance. This parameter is
         ignored when `normalize == True`.
 
-    k : int
-        The number of smallest elements in distance profile that should be stored
-        for constructing the top-k matrix profile.
+    k : int, default 1
+        The number of top `k` smallest distances used to construct the matrix profile.
+        Note that this will increase the total computational time and memory usage when k > 1.
 
     Returns
     -------
     out : numpy.ndarray
-        The first k columns consist of the top-k matrix profile, the next k columns
-        consist of their corresponding matrix profile indices, the column at
-        numpy indexing 2k contains the top-1 left matrix profile indices and the last
-        column, at numpy indexing 2k+1, contains the top-1 right matrix profile indices.
+        When k = 1 (default), the first column consists of the matrix profile,
+        the second column consists of the matrix profile indices, the third column
+        consists of the left matrix profile indices, and the fourth column consists of
+        the right matrix profile indices. However, when k > 1, the output array will
+        contain exactly 2 * k + 2 columns. The first k columns (i.e., out[:, :k]) consists
+        of the top-k matrix profile, the next set of k columns (i.e., out[:, k:2k]) consists
+        of the corresponding top-k matrix profile indices, and the last two columns
+        (i.e., out[:, 2k] and out[:, 2k+1] or, equivalently, out[:, -2] and out[:, -1]) correspond to
+        the top-1 left matrix profile indices and the top-1 right matrix profile indices, respectively.
 
     See Also
     --------

From 5ab2978f9c09589e7cbc6279d7c5fb27c07d9723 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Fri, 20 May 2022 22:17:51 -0600
Subject: [PATCH 094/151] Fixed order of outputs returned in _stump

---
 stumpy/scrump.py  | 2 +-
 stumpy/stumped.py | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/stumpy/scrump.py b/stumpy/scrump.py
index 9fcb51e4b..c547ab02b 100644
--- a/stumpy/scrump.py
+++ b/stumpy/scrump.py
@@ -609,7 +609,7 @@ def update(self):
         if self._chunk_idx < self._n_chunks:
             start_idx, stop_idx = self._chunk_diags_ranges[self._chunk_idx]
 
-            P, I, PL, IL, PR, IR = _stump(
+            P, PL, PR, I, IL, IR = _stump(
                 self._T_A,
                 self._T_B,
                 self._m,
diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index 99a1ba0b1..1c8b2cd80 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -268,15 +268,15 @@ def stumped(
     results = dask_client.gather(futures)
     (
         profile[:, :k],
-        indices[:, :k],
         profile_L,
-        indices_L,
         profile_R,
+        indices[:, :k],
+        indices_L,
         indices_R,
     ) = results[0]
 
     for i in range(1, len(hosts)):
-        P, I, PL, IL, PR, IR = results[i]
+        P, PL, PR, I, IL, IR = results[i]
 
         profile[:, k:] = P
         indices[:, k:] = I

From 6460a5bc57a4a6ecc3beff8a45f6262cfd47807b Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 21 May 2022 11:23:00 -0600
Subject: [PATCH 095/151] Add new function to update TopK MatrixProfile

---
 stumpy/stumped.py | 47 +++++++++++++++++++++++++++++++++--------------
 1 file changed, 33 insertions(+), 14 deletions(-)

diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index 1c8b2cd80..e9784e28f 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -5,6 +5,7 @@
 import logging
 
 import numpy as np
+from numba import njit, prange
 
 from . import core, config
 from .stump import _stump
@@ -13,6 +14,35 @@
 logger = logging.getLogger(__name__)
 
 
+@njit(parallel=True)
+def _merge_topk_profiles_indices(PA, PB, IA, IB):
+    """
+    Merge two top-k matrix profiles while prioritizing values of PA in ties
+    and update PA (and so IA)
+
+    PA : numpy.ndarray
+        a (top-k) matrix profile
+
+    PB : numpy.ndarray
+        a (top-k) matrix profile
+
+    IA : numpy.ndarray
+        a (top-k) matrix profile indices, corresponding to PA
+
+    IB : numpy.ndarray
+        a (top-k) matrix profile indices, corresponding to PB
+    """
+    for i in prange(PA.shape[0]):
+        for j in range(PA.shape[1]):
+            if PB[i, j] < PA[i, -1]:
+                idx = np.searchsorted(PA[i], PB[i, j], side="right")
+
+                PA[i, idx + 1 :] = PA[i, idx:-1]
+                PA[i, idx] = PB[i, j]
+                IA[i, idx + 1 :] = IA[i, idx:-1]
+                IA[i, idx] = IB[i, j]
+
+
 @core.non_normalized(aamped)
 def stumped(
     dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1
@@ -266,23 +296,12 @@ def stumped(
     indices = np.empty((l, 2 * k))
 
     results = dask_client.gather(futures)
-    (
-        profile[:, :k],
-        profile_L,
-        profile_R,
-        indices[:, :k],
-        indices_L,
-        indices_R,
-    ) = results[0]
+    profile, profile_L, profile_R, indices, indices_L, indices_R = results[0]
 
     for i in range(1, len(hosts)):
         P, PL, PR, I, IL, IR = results[i]
-
-        profile[:, k:] = P
-        indices[:, k:] = I
-        idx = np.argsort(profile, axis=1)
-        profile = np.take_along_axis(profile, idx, axis=1)
-        indices = np.take_along_axis(indices, idx, axis=1)
+        # Update top-k matrix profile and matrix profile indices
+        _merge_topk_profiles_indices(profile, P, indices, I)
 
         # Update top-1 left matrix profile and matrix profile index
         cond = PL < profile_L

From d94db722bb2b3150a38008a323ccd117f4bfc1c2 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 21 May 2022 11:34:38 -0600
Subject: [PATCH 096/151] Add .copy() to update array properly

---
 stumpy/stumped.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index e9784e28f..01606f5bf 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -37,9 +37,9 @@ def _merge_topk_profiles_indices(PA, PB, IA, IB):
             if PB[i, j] < PA[i, -1]:
                 idx = np.searchsorted(PA[i], PB[i, j], side="right")
 
-                PA[i, idx + 1 :] = PA[i, idx:-1]
+                PA[i, idx + 1 :] = PA[i, idx:-1].copy()
                 PA[i, idx] = PB[i, j]
-                IA[i, idx + 1 :] = IA[i, idx:-1]
+                IA[i, idx + 1 :] = IA[i, idx:-1].copy()
                 IA[i, idx] = IB[i, j]
 
 
From 72c3887b014f6be2fe89030013179bfd182bc1c1 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 21 May 2022 12:31:08 -0600
Subject: [PATCH 097/151] Add new test function for TopK MatrixProfile with
 gpu_stump

---
 tests/test_gpu_stump.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/tests/test_gpu_stump.py b/tests/test_gpu_stump.py
index 508b02a56..1a2662647 100644
--- a/tests/test_gpu_stump.py
+++ b/tests/test_gpu_stump.py
@@ -350,3 +350,20 @@ def test_gpu_stump_nan_zero_mean_self_join():
     naive.replace_inf(ref_mp)
     naive.replace_inf(comp_mp)
     npt.assert_almost_equal(ref_mp, comp_mp)
+
+
+@pytest.mark.filterwarnings("ignore", category=NumbaPerformanceWarning)
+@pytest.mark.parametrize("T_A, T_B", test_data)
+def test_gpu_stump_self_join_KNN(T_A, T_B):
+    k = 3
+    m = 3
+    zone = int(np.ceil(m / 4))
+    ref_mp = naive.stump(T_B, m, exclusion_zone=zone, row_wise=True, k=k)
+    comp_mp = gpu_stump(T_B, m, ignore_trivial=True, k=k)
+    naive.replace_inf(ref_mp)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp, comp_mp)
+
+    comp_mp = gpu_stump(pd.Series(T_B), m, ignore_trivial=True, k=k)
+    naive.replace_inf(comp_mp)
+    npt.assert_almost_equal(ref_mp, comp_mp)

From 0068358c8c771d950090d62779a9fd30336f2bfc Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 22 May 2022 00:15:00 -0600
Subject: [PATCH 098/151] Enhance gpu_stump to return TopK MatrixProfile

---
 stumpy/gpu_stump.py | 247 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 198 insertions(+), 49 deletions(-)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index 667dd8b56..606bf7faf 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -7,7 +7,7 @@
 import os
 
 import numpy as np
-from numba import cuda
+from numba import cuda, njit, prange
 
 from . import core, config
 from .gpu_aamp import gpu_aamp
@@ -15,9 +15,38 @@
 logger = logging.getLogger(__name__)
 
 
+@njit(parallel=True)
+def _merge_topk_profiles_indices(PA, PB, IA, IB):
+    """
+    Merge two top-k matrix profiles while prioritizing values of PA in ties
+    and update PA (and so IA)
+
+    PA : numpy.ndarray
+        a (top-k) matrix profile
+
+    PB : numpy.ndarray
+        a (top-k) matrix profile
+
+    IA : numpy.ndarray
+        a (top-k) matrix profile indices, corresponding to PA
+
+    IB : numpy.ndarray
+        a (top-k) matrix profile indices, corresponding to PB
+    """
+    for i in range(PA.shape[0]):
+        for j in range(PA.shape[1]):
+            if PB[i, j] < PA[i, -1]:
+                idx = np.searchsorted(PA[i], PB[i, j], side="right")
+
+                PA[i, idx + 1 :] = PA[i, idx:-1].copy()
+                PA[i, idx] = PB[i, j]
+                IA[i, idx + 1 :] = IA[i, idx:-1].copy()
+                IA[i, idx] = IB[i, j]
+
+
 @cuda.jit(
     "(i8, f8[:], f8[:], i8,  f8[:], f8[:], f8[:], f8[:], f8[:],"
-    "f8[:], f8[:], i8, b1, i8, f8[:, :], i8[:, :], b1)"
+    "f8[:], f8[:], i8, b1, i8, f8[:, :], f8[:], f8[:], i8[:, :], i8[:], i8[:], b1, i2)"
 )
 def _compute_and_update_PI_kernel(
     i,
@@ -31,12 +60,17 @@ def _compute_and_update_PI_kernel(
     Σ_T,
     μ_Q,
     σ_Q,
-    k,
+    profile_len,
     ignore_trivial,
     excl_zone,
     profile,
+    profile_L,
+    profile_R,
     indices,
+    indices_L,
+    indices_R,
     compute_QT,
+    k,
 ):
     """
     A Numba CUDA kernel to update the matrix profile and matrix profile indices
@@ -79,7 +113,7 @@ def _compute_and_update_PI_kernel(
     σ_Q : numpy.ndarray
         Standard deviation of the query sequence, `Q`
 
-    k : int
+    profile_len : int
         The total number of sliding windows to iterate over
 
     ignore_trivial : bool
@@ -91,18 +125,30 @@ def _compute_and_update_PI_kernel(
         sliding window
 
     profile : numpy.ndarray
-        Matrix profile. The first column consists of the global matrix profile,
-        the second column consists of the left matrix profile, and the third
-        column consists of the right matrix profile.
+        The (top-k) matrix profile, sorted in ascending order per row
+
+    profile_L : numpy.ndarray
+        The (top-1) left matrix profile
+
+    profile_R : numpy.ndarray
+        The (top-1) right matrix profile
 
     indices : numpy.ndarray
-        The first column consists of the matrix profile indices, the second
-        column consists of the left matrix profile indices, and the third
-        column consists of the right matrix profile indices.
+        The (top-k) matrix profile indices
+
+    indices_L : numpy.ndarray
+        The (top-1) left matrix profile indices
+
+    indices_R : numpy.ndarray
+        The (top-1) right matrix profile indices
 
     compute_QT : bool
         A boolean flag for whether or not to compute QT
 
+    k : int
+        The number of top `k` smallest distances used to construct the matrix profile.
+        Note that this will increase the total computational time and memory usage when k > 1.
+
     Returns
     -------
     None
@@ -126,7 +172,7 @@ def _compute_and_update_PI_kernel(
 
     for j in range(start, QT_out.shape[0], stride):
         zone_start = max(0, j - excl_zone)
-        zone_stop = min(k, j + excl_zone)
+        zone_stop = min(profile_len, j + excl_zone)
 
         if compute_QT:
             QT_out[j] = (
@@ -157,16 +203,22 @@ def _compute_and_update_PI_kernel(
         if ignore_trivial:
             if i <= zone_stop and i >= zone_start:
                 p_norm = np.inf
-            if p_norm < profile[j, 1] and i < j:
-                profile[j, 1] = p_norm
-                indices[j, 1] = i
-            if p_norm < profile[j, 2] and i > j:
-                profile[j, 2] = p_norm
-                indices[j, 2] = i
-
-        if p_norm < profile[j, 0]:
-            profile[j, 0] = p_norm
-            indices[j, 0] = i
+            if p_norm < profile_L[j] and i < j:
+                profile_L[j] = p_norm
+                indices_L[j] = i
+            if p_norm < profile_R[j] and i > j:
+                profile_R[j] = p_norm
+                indices_R[j] = i
+            
+        for idx in range(k, -1, -1):
+            if (p_norm < profile[j, idx - 1]) and (idx > 0):
+                profile[j, idx - 1] = profile[j, idx - 2]
+                indices[j, idx - 1] = indices[j, idx - 2]
+            else:
+                break
+        if idx < k:
+            profile[j, idx] = p_norm
+            indices[j, idx] = i
 
 
 def _gpu_stump(
@@ -181,10 +233,11 @@ def _gpu_stump(
     QT_first_fname,
     μ_Q_fname,
     σ_Q_fname,
-    k,
+    profile_len,
     ignore_trivial=True,
     range_start=1,
     device_id=0,
+    k=1,
 ):
     """
     A Numba CUDA version of STOMP for parallel computation of the
@@ -235,7 +288,7 @@ def _gpu_stump(
         The file name for the standard deviation of the query sequence, `Q`,
         relative to the current sliding window
 
-    k : int
+    profile_len : int
         The total number of sliding windows to iterate over
 
     ignore_trivial : bool
@@ -249,6 +302,10 @@ def _gpu_stump(
     device_id : int
         The (GPU) device number to use. The default value is `0`.
 
+    k : int
+        The number of top `k` smallest distances used to construct the matrix profile.
+        Note that this will increase the total computational time and memory usage when k > 1.
+
     Returns
     -------
     profile_fname : str
@@ -316,11 +373,22 @@ def _gpu_stump(
             device_M_T = cuda.to_device(M_T)
             device_Σ_T = cuda.to_device(Σ_T)
 
-        profile = np.full((k, 3), np.inf, dtype=np.float64)
-        indices = np.full((k, 3), -1, dtype=np.int64)
+        profile = np.full((profile_len, k), np.inf, dtype=np.float64)
+        indices = np.full((profile_len, k), -1, dtype=np.int64)
+
+        profile_L = np.full(profile_len, np.inf, dtype=np.float64)
+        indices_L = np.full(profile_len, -1, dtype=np.int64)
+
+        profile_R = np.full(profile_len, np.inf, dtype=np.float64)
+        indices_R = np.full(profile_len, -1, dtype=np.int64)
 
         device_profile = cuda.to_device(profile)
+        device_profile_L = cuda.to_device(profile_L)
+        device_profile_R = cuda.to_device(profile_R)
         device_indices = cuda.to_device(indices)
+        device_indices_L = cuda.to_device(indices_L)
+        device_indices_R = cuda.to_device(indices_R)
+
         _compute_and_update_PI_kernel[blocks_per_grid, threads_per_block](
             range_start - 1,
             device_T_A,
@@ -333,12 +401,17 @@ def _gpu_stump(
             device_Σ_T,
             device_μ_Q,
             device_σ_Q,
-            k,
+            profile_len,
             ignore_trivial,
             excl_zone,
             device_profile,
+            device_profile_L,
+            device_profile_R,
             device_indices,
+            device_indices_L,
+            device_indices_R,
             False,
+            k,
         )
 
         for i in range(range_start, range_stop):
@@ -354,27 +427,50 @@ def _gpu_stump(
                 device_Σ_T,
                 device_μ_Q,
                 device_σ_Q,
-                k,
+                profile_len,
                 ignore_trivial,
                 excl_zone,
                 device_profile,
+                device_profile_L,
+                device_profile_R,
                 device_indices,
+                device_indices_L,
+                device_indices_R,
                 True,
+                k,
             )
 
         profile = device_profile.copy_to_host()
+        profile_L = device_profile_L.copy_to_host()
+        profile_R = device_profile_R.copy_to_host()
         indices = device_indices.copy_to_host()
+        indices_L = device_indices_L.copy_to_host()
+        indices_R = device_indices_R.copy_to_host()
+
         profile = np.sqrt(profile)
+        profile_L = np.sqrt(profile_L)
+        profile_R = np.sqrt(profile_R)
 
         profile_fname = core.array_to_temp_file(profile)
+        profile_L_fname = core.array_to_temp_file(profile_L)
+        profile_R_fname = core.array_to_temp_file(profile_R)
         indices_fname = core.array_to_temp_file(indices)
+        indices_L_fname = core.array_to_temp_file(indices_L)
+        indices_R_fname = core.array_to_temp_file(indices_R)
 
-    return profile_fname, indices_fname
+    return (
+        profile_fname,
+        profile_L_fname,
+        profile_R_fname,
+        indices_fname,
+        indices_L_fname,
+        indices_R_fname,
+    )
 
 
 @core.non_normalized(gpu_aamp)
 def gpu_stump(
-    T_A, m, T_B=None, ignore_trivial=True, device_id=0, normalize=True, p=2.0
+    T_A, m, T_B=None, ignore_trivial=True, device_id=0, normalize=True, p=2.0, k=1
 ):
     """
     Compute the z-normalized matrix profile with one or more GPU devices
@@ -417,13 +513,22 @@ def gpu_stump(
         The p-norm to apply for computing the Minkowski distance. This parameter is
         ignored when `normalize == True`.
 
+    k : int, default 1
+        The number of top `k` smallest distances used to construct the matrix profile.
+        Note that this will increase the total computational time and memory usage when k > 1.
+
     Returns
     -------
     out : numpy.ndarray
-        The first column consists of the matrix profile, the second column
-        consists of the matrix profile indices, the third column consists of
-        the left matrix profile indices, and the fourth column consists of
-        the right matrix profile indices.
+        When k = 1 (default), the first column consists of the matrix profile,
+        the second column consists of the matrix profile indices, the third column
+        consists of the left matrix profile indices, and the fourth column consists of
+        the right matrix profile indices. However, when k > 1, the output array will
+        contain exactly 2 * k + 2 columns. The first k columns (i.e., out[:, :k]) consists
+        of the top-k matrix profile, the next set of k columns (i.e., out[:, k:2k]) consists
+        of the corresponding top-k matrix profile indices, and the last two columns
+        (i.e., out[:, 2k] and out[:, 2k+1] or, equivalently, out[:, -2] and out[:, -1]) correspond to
+        the top-1 left matrix profile indices and the top-1 right matrix profile indices, respectively.
 
     See Also
     --------
@@ -505,7 +610,7 @@ def gpu_stump(
         logger.warning("Try setting `ignore_trivial = False`.")
 
     n = T_B.shape[0]
-    k = T_A.shape[0] - m + 1
+    profile_len = T_A.shape[0] - m + 1
     l = n - m + 1
     excl_zone = int(
         np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)
@@ -518,8 +623,6 @@ def gpu_stump(
     μ_Q_fname = core.array_to_temp_file(μ_Q)
     σ_Q_fname = core.array_to_temp_file(σ_Q)
 
-    out = np.empty((k, 4), dtype=object)
-
     if isinstance(device_id, int):
         device_ids = [device_id]
     else:
@@ -528,6 +631,12 @@ def gpu_stump(
     profile = [None] * len(device_ids)
     indices = [None] * len(device_ids)
 
+    profile_L = [None] * len(device_ids)
+    indices_L = [None] * len(device_ids)
+
+    profile_R = [None] * len(device_ids)
+    indices_R = [None] * len(device_ids)
+
     for _id in device_ids:
         with cuda.gpus[_id]:
             if (
@@ -571,16 +680,24 @@ def gpu_stump(
                     QT_first_fname,
                     μ_Q_fname,
                     σ_Q_fname,
-                    k,
+                    profile_len,
                     ignore_trivial,
                     start + 1,
                     device_ids[idx],
+                    k,
                 ),
             )
         else:
             # Execute last chunk in parent process
             # Only parent process is executed when a single GPU is requested
-            profile[idx], indices[idx] = _gpu_stump(
+            (
+                profile[idx],
+                profile_L[idx],
+                profile_R[idx],
+                indices[idx],
+                indices_L[idx],
+                indices_R[idx],
+            ) = _gpu_stump(
                 T_A_fname,
                 T_B_fname,
                 m,
@@ -592,10 +709,11 @@ def gpu_stump(
                 QT_first_fname,
                 μ_Q_fname,
                 σ_Q_fname,
-                k,
+                profile_len,
                 ignore_trivial,
                 start + 1,
                 device_ids[idx],
+                k,
             )
 
     # Clean up process pool for multi-GPU request
@@ -606,7 +724,14 @@ def gpu_stump(
         # Collect results from spawned child processes if they exist
         for idx, result in enumerate(results):
             if result is not None:
-                profile[idx], indices[idx] = result.get()
+                (
+                    profile[idx],
+                    profile_L[idx],
+                    profile_R[idx],
+                    indices[idx],
+                    indices_L[idx],
+                    indices_R[idx],
+                ) = result.get()
 
     os.remove(T_A_fname)
     os.remove(T_B_fname)
@@ -621,22 +746,46 @@ def gpu_stump(
 
     for idx in range(len(device_ids)):
         profile_fname = profile[idx]
+        profile_L_fname = profile_L[idx]
+        profile_R_fname = profile_R[idx]
         indices_fname = indices[idx]
+        indices_L_fname = indices_L[idx]
+        indices_R_fname = indices_R[idx]
+
         profile[idx] = np.load(profile_fname, allow_pickle=False)
+        profile_L[idx] = np.load(profile_L_fname, allow_pickle=False)
+        profile_R[idx] = np.load(profile_R_fname, allow_pickle=False)
         indices[idx] = np.load(indices_fname, allow_pickle=False)
+        indices_L[idx] = np.load(indices_L_fname, allow_pickle=False)
+        indices_R[idx] = np.load(indices_R_fname, allow_pickle=False)
+
         os.remove(profile_fname)
+        os.remove(profile_L_fname)
+        os.remove(profile_R_fname)
         os.remove(indices_fname)
+        os.remove(indices_L_fname)
+        os.remove(indices_R_fname)
 
     for i in range(1, len(device_ids)):
-        # Update all matrix profiles and matrix profile indices
-        # (global, left, right) and store in profile[0] and indices[0]
-        for col in range(profile[0].shape[1]):  # pragma: no cover
-            cond = profile[0][:, col] < profile[i][:, col]
-            profile[0][:, col] = np.where(cond, profile[0][:, col], profile[i][:, col])
-            indices[0][:, col] = np.where(cond, indices[0][:, col], indices[i][:, col])
-
-    out[:, 0] = profile[0][:, 0]
-    out[:, 1:4] = indices[0][:, :]
+        # Update (top-k) matrix profile and matrix profile indices
+        _merge_topk_profiles_indices(profile[0], profile[i], indices[0], indices[i])
+
+        # Update (top-1) left matrix profile and matrix profil indices
+        cond = profile_L[0] < profile_L[i]
+        profile_L[0] = np.where(cond, profile_L[0], profile_L[i])
+        indices_L[0] = np.where(cond, indices_L[0], indices_L[i])
+
+        # Update (top-1) right matrix profile and matrix profil indices
+        cond = profile_R[0] < profile_R[i]
+        profile_R[0] = np.where(cond, profile_R[0], profile_R[i])
+        indices_R[0] = np.where(cond, indices_R[0], indices_R[i])
+
+    out = np.empty(
+        (profile_len, 2 * k + 2), dtype=object
+    )  # last two columns are to store
+    # (top-1) left/right matrix profile indices
+    out[:, :k] = profile[0]
+    out[:, k:] = np.column_stack((indices[0], indices_L[0], indices_R[0]))
 
     threshold = 10e-6
     if core.are_distances_too_small(out[:, 0], threshold=threshold):  # pragma: no cover

From 1e7c05e0dce914ca2fc8fbc39cb5411b4fd5fb03 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 22 May 2022 14:16:40 -0600
Subject: [PATCH 099/151] Refactored function for merging two TopK
 MatrixProfile

---
 stumpy/core.py      | 37 ++++++++++++++++++++++++++++++++++++-
 stumpy/gpu_stump.py | 41 +++++++----------------------------------
 stumpy/stumped.py   | 33 +--------------------------------
 3 files changed, 44 insertions(+), 67 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index a2a30c043..64dee293c 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -7,7 +7,7 @@
 import inspect
 
 import numpy as np
-from numba import njit
+from numba import njit, prange
 from scipy.signal import convolve
 from scipy.ndimage import maximum_filter1d, minimum_filter1d
 from scipy import linalg
@@ -2494,3 +2494,38 @@ def _select_P_ABBA_value(P_ABBA, k, custom_func=None):
             MPdist = partition[k]
 
     return MPdist
+
+
+@njit(parallel=True)
+def _merge_topk_profiles_indices(PA, PB, IA, IB):
+    """
+    Merge two top-k matrix profiles PA and PB, and update PA (in place) while
+    prioritizing values of PA in ties. Also, update IA accordingly.
+
+    Parameters
+    ----------
+    PA : numpy.ndarray
+        a (top-k) matrix profile
+
+    PB : numpy.ndarray
+        a (top-k) matrix profile
+
+    IA : numpy.ndarray
+        a (top-k) matrix profile indices, corresponding to PA
+
+    IB : numpy.ndarray
+        a (top-k) matrix profile indices, corresponding to PB
+
+    Returns
+    -------
+    None
+    """
+    for i in prange(PA.shape[0]):
+        for j in range(PA.shape[1]):
+            if PB[i, j] < PA[i, -1]:
+                idx = np.searchsorted(PA[i], PB[i, j], side="right")
+
+                PA[i, idx + 1 :] = PA[i, idx:-1].copy()
+                PA[i, idx] = PB[i, j]
+                IA[i, idx + 1 :] = IA[i, idx:-1].copy()
+                IA[i, idx] = IB[i, j]
diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index 606bf7faf..2df5b14b1 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -7,7 +7,7 @@
 import os
 
 import numpy as np
-from numba import cuda, njit, prange
+from numba import cuda
 
 from . import core, config
 from .gpu_aamp import gpu_aamp
@@ -15,35 +15,6 @@
 logger = logging.getLogger(__name__)
 
 
-@njit(parallel=True)
-def _merge_topk_profiles_indices(PA, PB, IA, IB):
-    """
-    Merge two top-k matrix profiles while prioritizing values of PA in ties
-    and update PA (and so IA)
-
-    PA : numpy.ndarray
-        a (top-k) matrix profile
-
-    PB : numpy.ndarray
-        a (top-k) matrix profile
-
-    IA : numpy.ndarray
-        a (top-k) matrix profile indices, corresponding to PA
-
-    IB : numpy.ndarray
-        a (top-k) matrix profile indices, corresponding to PB
-    """
-    for i in range(PA.shape[0]):
-        for j in range(PA.shape[1]):
-            if PB[i, j] < PA[i, -1]:
-                idx = np.searchsorted(PA[i], PB[i, j], side="right")
-
-                PA[i, idx + 1 :] = PA[i, idx:-1].copy()
-                PA[i, idx] = PB[i, j]
-                IA[i, idx + 1 :] = IA[i, idx:-1].copy()
-                IA[i, idx] = IB[i, j]
-
-
 @cuda.jit(
     "(i8, f8[:], f8[:], i8,  f8[:], f8[:], f8[:], f8[:], f8[:],"
     "f8[:], f8[:], i8, b1, i8, f8[:, :], f8[:], f8[:], i8[:, :], i8[:], i8[:], b1, i2)"
@@ -209,7 +180,7 @@ def _compute_and_update_PI_kernel(
             if p_norm < profile_R[j] and i > j:
                 profile_R[j] = p_norm
                 indices_R[j] = i
-            
+
         for idx in range(k, -1, -1):
             if (p_norm < profile[j, idx - 1]) and (idx > 0):
                 profile[j, idx - 1] = profile[j, idx - 2]
@@ -766,9 +737,11 @@ def gpu_stump(
         os.remove(indices_L_fname)
         os.remove(indices_R_fname)
 
+    profile_0 = profile[0].copy()
+    indices_0 = indices[0].copy()
     for i in range(1, len(device_ids)):
         # Update (top-k) matrix profile and matrix profile indices
-        _merge_topk_profiles_indices(profile[0], profile[i], indices[0], indices[i])
+        core._merge_topk_profiles_indices(profile_0, profile[i], indices_0, indices[i])
 
         # Update (top-1) left matrix profile and matrix profil indices
         cond = profile_L[0] < profile_L[i]
@@ -784,8 +757,8 @@ def gpu_stump(
         (profile_len, 2 * k + 2), dtype=object
     )  # last two columns are to store
     # (top-1) left/right matrix profile indices
-    out[:, :k] = profile[0]
-    out[:, k:] = np.column_stack((indices[0], indices_L[0], indices_R[0]))
+    out[:, :k] = profile_0
+    out[:, k:] = np.column_stack((indices_0, indices_L[0], indices_R[0]))
 
     threshold = 10e-6
     if core.are_distances_too_small(out[:, 0], threshold=threshold):  # pragma: no cover
diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index 01606f5bf..0667713d3 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -5,7 +5,6 @@
 import logging
 
 import numpy as np
-from numba import njit, prange
 
 from . import core, config
 from .stump import _stump
@@ -13,36 +12,6 @@
 
 logger = logging.getLogger(__name__)
 
-
-@njit(parallel=True)
-def _merge_topk_profiles_indices(PA, PB, IA, IB):
-    """
-    Merge two top-k matrix profiles while prioritizing values of PA in ties
-    and update PA (and so IA)
-
-    PA : numpy.ndarray
-        a (top-k) matrix profile
-
-    PB : numpy.ndarray
-        a (top-k) matrix profile
-
-    IA : numpy.ndarray
-        a (top-k) matrix profile indices, corresponding to PA
-
-    IB : numpy.ndarray
-        a (top-k) matrix profile indices, corresponding to PB
-    """
-    for i in prange(PA.shape[0]):
-        for j in range(PA.shape[1]):
-            if PB[i, j] < PA[i, -1]:
-                idx = np.searchsorted(PA[i], PB[i, j], side="right")
-
-                PA[i, idx + 1 :] = PA[i, idx:-1].copy()
-                PA[i, idx] = PB[i, j]
-                IA[i, idx + 1 :] = IA[i, idx:-1].copy()
-                IA[i, idx] = IB[i, j]
-
-
 @core.non_normalized(aamped)
 def stumped(
     dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1
@@ -301,7 +270,7 @@ def stumped(
     for i in range(1, len(hosts)):
         P, PL, PR, I, IL, IR = results[i]
         # Update top-k matrix profile and matrix profile indices
-        _merge_topk_profiles_indices(profile, P, indices, I)
+        core._merge_topk_profiles_indices(profile, P, indices, I)
 
         # Update top-1 left matrix profile and matrix profile index
         cond = PL < profile_L

From 2ebc276498eab50fb08c3f1f2ecf30db337eb80e Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 22 May 2022 14:21:33 -0600
Subject: [PATCH 100/151] Clean up code

---
 stumpy/gpu_stump.py | 10 +++++-----
 stumpy/stumped.py   |  5 +++--
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index 2df5b14b1..803b020f0 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -737,11 +737,11 @@ def gpu_stump(
         os.remove(indices_L_fname)
         os.remove(indices_R_fname)
 
-    profile_0 = profile[0].copy()
-    indices_0 = indices[0].copy()
     for i in range(1, len(device_ids)):
         # Update (top-k) matrix profile and matrix profile indices
-        core._merge_topk_profiles_indices(profile_0, profile[i], indices_0, indices[i])
+        core._merge_topk_profiles_indices(
+            profile[0], profile[i], indices[0], indices[i]
+        )
 
         # Update (top-1) left matrix profile and matrix profil indices
         cond = profile_L[0] < profile_L[i]
@@ -757,8 +757,8 @@ def gpu_stump(
         (profile_len, 2 * k + 2), dtype=object
     )  # last two columns are to store
     # (top-1) left/right matrix profile indices
-    out[:, :k] = profile_0
-    out[:, k:] = np.column_stack((indices_0, indices_L[0], indices_R[0]))
+    out[:, :k] = profile[0]
+    out[:, k:] = np.column_stack((indices[0], indices_L[0], indices_R[0]))
 
     threshold = 10e-6
     if core.are_distances_too_small(out[:, 0], threshold=threshold):  # pragma: no cover
diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index 0667713d3..17e0d556c 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -12,6 +12,7 @@
 
 logger = logging.getLogger(__name__)
 
+
 @core.non_normalized(aamped)
 def stumped(
     dask_client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1
@@ -283,8 +284,8 @@ def stumped(
         indices_R = np.where(cond, IR, indices_R)
 
     out = np.empty((l, 2 * k + 2), dtype=object)
-    out[:, :k] = profile[:, :k]
-    out[:, k:] = np.column_stack((indices[:, :k], indices_L, indices_R))
+    out[:, :k] = profile
+    out[:, k:] = np.column_stack((indices, indices_L, indices_R))
 
     # Delete data from Dask cluster
     dask_client.cancel(T_A_future)

From 1170f2ebd770ed4f70aa3048dd4e6778bb723c53 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 22 May 2022 15:44:18 -0600
Subject: [PATCH 101/151] Add naive version of merge_topk_matrix_profile
 function

---
 tests/test_core.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tests/test_core.py b/tests/test_core.py
index 6ef78d230..c26dd449d 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -82,6 +82,15 @@ def naive_bsf_indices(n):
     return np.array(out)
 
 
+def naive_merge_topk_profiles_indices(PA, PB, IA, IB):
+    profile = np.column_stack((PA, PB))
+    indices = np.column_stack((IA, IB))
+
+    idx = np.argsort(profile, axis=1)
+    PA[:, :] = np.take_along_axis(profile, idx, axis=1)[:, : PA.shape[1]]
+    IA[:, :] = np.take_along_axis(indices, idx, axis=1)[:, : PA.shape[1]]
+
+
 test_data = [
     (np.array([-1, 1, 2], dtype=np.float64), np.array(range(5), dtype=np.float64)),
     (

From 2a827b450df582b95d68a0578ca82ced758fe7f1 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 22 May 2022 15:48:54 -0600
Subject: [PATCH 102/151] Rename function

---
 stumpy/core.py      | 2 +-
 stumpy/gpu_stump.py | 2 +-
 stumpy/stumped.py   | 2 +-
 tests/test_core.py  | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 64dee293c..89b6266fc 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2497,7 +2497,7 @@ def _select_P_ABBA_value(P_ABBA, k, custom_func=None):
 
 
 @njit(parallel=True)
-def _merge_topk_profiles_indices(PA, PB, IA, IB):
+def _merge_topk_PI(PA, PB, IA, IB):
     """
     Merge two top-k matrix profiles PA and PB, and update PA (in place) while
     prioritizing values of PA in ties. Also, update IA accordingly.
diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index 803b020f0..cc4537813 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -739,7 +739,7 @@ def gpu_stump(
 
     for i in range(1, len(device_ids)):
         # Update (top-k) matrix profile and matrix profile indices
-        core._merge_topk_profiles_indices(
+        core._merge_topk_PI(
             profile[0], profile[i], indices[0], indices[i]
         )
 
diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index 17e0d556c..0f6459db5 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -271,7 +271,7 @@ def stumped(
     for i in range(1, len(hosts)):
         P, PL, PR, I, IL, IR = results[i]
         # Update top-k matrix profile and matrix profile indices
-        core._merge_topk_profiles_indices(profile, P, indices, I)
+        core._merge_topk_PI(profile, P, indices, I)
 
         # Update top-1 left matrix profile and matrix profile index
         cond = PL < profile_L
diff --git a/tests/test_core.py b/tests/test_core.py
index c26dd449d..95dc268d3 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -82,7 +82,7 @@ def naive_bsf_indices(n):
     return np.array(out)
 
 
-def naive_merge_topk_profiles_indices(PA, PB, IA, IB):
+def naive_merge_topk_PI(PA, PB, IA, IB):
     profile = np.column_stack((PA, PB))
     indices = np.column_stack((IA, IB))
 

From cc62c74f11f229dcc7bd98aabba2759cda91260f Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 22 May 2022 16:13:24 -0600
Subject: [PATCH 103/151] Revise naive function to make it more readable

---
 tests/test_core.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 95dc268d3..4585de1af 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -87,9 +87,11 @@ def naive_merge_topk_PI(PA, PB, IA, IB):
     indices = np.column_stack((IA, IB))
 
     idx = np.argsort(profile, axis=1)
-    PA[:, :] = np.take_along_axis(profile, idx, axis=1)[:, : PA.shape[1]]
-    IA[:, :] = np.take_along_axis(indices, idx, axis=1)[:, : PA.shape[1]]
+    profile = np.take_along_axis(profile, idx, axis=1)
+    indices = np.take_along_axis(indices, idx, axis=1)
 
+    PA[:, :] = profile[:, : PA.shape[1]]
+    IA[:, :] = indices[:, : PA.shape[1]]
 
 test_data = [
     (np.array([-1, 1, 2], dtype=np.float64), np.array(range(5), dtype=np.float64)),

From b6b74c4edaa2cb2bf5c3a45987b631cf1a76ab9e Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 22 May 2022 17:32:44 -0600
Subject: [PATCH 104/151] Add test function for merge_topk_PI

---
 tests/test_core.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/tests/test_core.py b/tests/test_core.py
index 4585de1af..8e29c2f1a 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1039,3 +1039,51 @@ def test_select_P_ABBA_val_inf():
     p_abba.sort()
     ref = p_abba[k - 1]
     npt.assert_almost_equal(ref, comp)
+
+
+def test_merge_topk_PI():
+    PA = np.array([
+    [0.0, 0.0, 0.0, 0.0, 0.0],
+    [0.1, 0.2, 0.3, 0.4, 0.5],
+    [0.1, 0.2, 0.3, 0.4, 0.5],
+    [0.1, 0.2, 0.3, 0.4, 0.5],
+    [0.1, 0.2, 0.3, 0.4, 0.5],
+    [0.1, 0.2, 0.3, 0.4, 0.5],
+    [0.1, 0.1, 0.2, 0.3, 0.4],
+    [0.1, 0.2, np.inf, np.inf, np.inf],
+    [np.inf, np.inf, np.inf, np.inf, np.inf]
+    ])
+
+    PB = np.array([
+    [0.0, 0.0, 0.0, 0.0, 0.0],
+    [0.0, 0.15, 0.25, 0.35, 0.45],
+    [0.15, 0.25, 0.35, 0.45, 0.55],
+    [0.01, 0.02, 0.03, 0.04, 0.05],
+    [0.6, 0.7, 0.8, 0.9, 1],
+    [0.1, 0.1, 0.2, 0.3, 0.4],
+    [0.1, 0.2, 0.3, 0.4, 0.5],
+    [0.0, 0.3, np.inf, np.inf, np.inf],
+    [np.inf, np.inf, np.inf, np.inf, np.inf],
+    ])
+
+    n, k = PA.shape
+
+    IA = np.arange(n * k).reshape(n, k)
+    IB = IA.copy() + n * k
+    IA[7, 2:] = -1
+    IA[8, :] = -1
+    IB[7, 2:] = -1
+    IB[8, :] = -1
+
+    ref_P = PA.copy()
+    ref_I = IA.copy()
+
+    comp_P = PA.copy()
+    comp_I = IA.copy()
+
+    naive_merge_topk_PI(ref_P, PB, ref_I, IB)
+    core._merge_topk_PI(comp_P, PB, comp_I, IB)
+
+    ref = np.column_stack((ref_P, ref_I))
+    comp = np.column_stack((comp_P, comp_I))
+    npt.assert_array_equal(ref, comp)

From b6d6450850453bfde5c932d129e79e063435b9f8 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 22 May 2022 17:37:51 -0600
Subject: [PATCH 105/151] Moved naive function to naive.py

---
 tests/naive.py     | 12 ++++++++++++
 tests/test_core.py | 13 +------------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 4a5ed789a..3074c2359 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -1760,3 +1760,15 @@ def _total_diagonal_ndists(tile_lower_diag, tile_upper_diag, tile_height, tile_w
         )
 
     return total_ndists
+
+
+def merge_topk_PI(PA, PB, IA, IB):
+    profile = np.column_stack((PA, PB))
+    indices = np.column_stack((IA, IB))
+
+    idx = np.argsort(profile, axis=1)
+    profile = np.take_along_axis(profile, idx, axis=1)
+    indices = np.take_along_axis(indices, idx, axis=1)
+
+    PA[:, :] = profile[:, : PA.shape[1]]
+    IA[:, :] = indices[:, : PA.shape[1]]
diff --git a/tests/test_core.py b/tests/test_core.py
index 8e29c2f1a..e45f8c600 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -82,17 +82,6 @@ def naive_bsf_indices(n):
     return np.array(out)
 
 
-def naive_merge_topk_PI(PA, PB, IA, IB):
-    profile = np.column_stack((PA, PB))
-    indices = np.column_stack((IA, IB))
-
-    idx = np.argsort(profile, axis=1)
-    profile = np.take_along_axis(profile, idx, axis=1)
-    indices = np.take_along_axis(indices, idx, axis=1)
-
-    PA[:, :] = profile[:, : PA.shape[1]]
-    IA[:, :] = indices[:, : PA.shape[1]]
-
 test_data = [
     (np.array([-1, 1, 2], dtype=np.float64), np.array(range(5), dtype=np.float64)),
     (
@@ -1081,7 +1070,7 @@ def test_merge_topk_PI():
     comp_P = PA.copy()
     comp_I = IA.copy()
 
-    naive_merge_topk_PI(ref_P, PB, ref_I, IB)
+    naive.merge_topk_PI(ref_P, PB, ref_I, IB)
     core._merge_topk_PI(comp_P, PB, comp_I, IB)
 
     ref = np.column_stack((ref_P, ref_I))

From 97a04f457ca7c7542b768e504652bf2a9b0d7abf Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 22 May 2022 17:46:32 -0600
Subject: [PATCH 106/151] Correct Format

---
 stumpy/gpu_stump.py |  4 +---
 tests/test_core.py  | 50 ++++++++++++++++++++++++---------------------
 2 files changed, 28 insertions(+), 26 deletions(-)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index cc4537813..26e49cbb2 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -739,9 +739,7 @@ def gpu_stump(
 
     for i in range(1, len(device_ids)):
         # Update (top-k) matrix profile and matrix profile indices
-        core._merge_topk_PI(
-            profile[0], profile[i], indices[0], indices[i]
-        )
+        core._merge_topk_PI(profile[0], profile[i], indices[0], indices[i])
 
         # Update (top-1) left matrix profile and matrix profil indices
         cond = profile_L[0] < profile_L[i]
diff --git a/tests/test_core.py b/tests/test_core.py
index e45f8c600..707893d14 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1031,29 +1031,33 @@ def test_select_P_ABBA_val_inf():
 
 
 def test_merge_topk_PI():
-    PA = np.array([
-    [0.0, 0.0, 0.0, 0.0, 0.0],
-    [0.1, 0.2, 0.3, 0.4, 0.5],
-    [0.1, 0.2, 0.3, 0.4, 0.5],
-    [0.1, 0.2, 0.3, 0.4, 0.5],
-    [0.1, 0.2, 0.3, 0.4, 0.5],
-    [0.1, 0.2, 0.3, 0.4, 0.5],
-    [0.1, 0.1, 0.2, 0.3, 0.4],
-    [0.1, 0.2, np.inf, np.inf, np.inf],
-    [np.inf, np.inf, np.inf, np.inf, np.inf]
-    ])
-
-    PB = np.array([
-    [0.0, 0.0, 0.0, 0.0, 0.0],
-    [0.0, 0.15, 0.25, 0.35, 0.45],
-    [0.15, 0.25, 0.35, 0.45, 0.55],
-    [0.01, 0.02, 0.03, 0.04, 0.05],
-    [0.6, 0.7, 0.8, 0.9, 1],
-    [0.1, 0.1, 0.2, 0.3, 0.4],
-    [0.1, 0.2, 0.3, 0.4, 0.5],
-    [0.0, 0.3, np.inf, np.inf, np.inf],
-    [np.inf, np.inf, np.inf, np.inf, np.inf],
-    ])
+    PA = np.array(
+        [
+            [0.0, 0.0, 0.0, 0.0, 0.0],
+            [0.1, 0.2, 0.3, 0.4, 0.5],
+            [0.1, 0.2, 0.3, 0.4, 0.5],
+            [0.1, 0.2, 0.3, 0.4, 0.5],
+            [0.1, 0.2, 0.3, 0.4, 0.5],
+            [0.1, 0.2, 0.3, 0.4, 0.5],
+            [0.1, 0.1, 0.2, 0.3, 0.4],
+            [0.1, 0.2, np.inf, np.inf, np.inf],
+            [np.inf, np.inf, np.inf, np.inf, np.inf],
+        ]
+    )
+
+    PB = np.array(
+        [
+            [0.0, 0.0, 0.0, 0.0, 0.0],
+            [0.0, 0.15, 0.25, 0.35, 0.45],
+            [0.15, 0.25, 0.35, 0.45, 0.55],
+            [0.01, 0.02, 0.03, 0.04, 0.05],
+            [0.6, 0.7, 0.8, 0.9, 1],
+            [0.1, 0.1, 0.2, 0.3, 0.4],
+            [0.1, 0.2, 0.3, 0.4, 0.5],
+            [0.0, 0.3, np.inf, np.inf, np.inf],
+            [np.inf, np.inf, np.inf, np.inf, np.inf],
+        ]
+    )
 
     n, k = PA.shape
 

From 50f4ee8cf84b6f5958b9691d23d961a26d5f06b5 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 22 May 2022 17:58:52 -0600
Subject: [PATCH 107/151] Correct Style

---
 stumpy/aamp.py      |  3 ++-
 stumpy/aamped.py    |  3 ++-
 stumpy/gpu_stump.py | 24 ++++++++++++++----------
 stumpy/stump.py     | 24 ++++++++++++++----------
 stumpy/stumped.py   | 18 ++++++++++--------
 5 files changed, 42 insertions(+), 30 deletions(-)

diff --git a/stumpy/aamp.py b/stumpy/aamp.py
index 428c3d4bd..82eb41639 100644
--- a/stumpy/aamp.py
+++ b/stumpy/aamp.py
@@ -270,7 +270,8 @@ def aamp(T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1):
 
     k : int, default 1
         The number of top `k` smallest distances used to construct the matrix profile.
-        Note that this will increase the total computational time and memory usage when k > 1.
+        Note that this will increase the total computational time and memory usage
+        when k > 1.
 
     Returns
     -------
diff --git a/stumpy/aamped.py b/stumpy/aamped.py
index ad147b42f..4499c58b5 100644
--- a/stumpy/aamped.py
+++ b/stumpy/aamped.py
@@ -49,7 +49,8 @@ def aamped(dask_client, T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1):
 
     k : int, default 1
         The number of top `k` smallest distances used to construct the matrix profile.
-        Note that this will increase the total computational time and memory usage when k > 1.
+        Note that this will increase the total computational time and memory usage
+        when k > 1.
 
     Returns
     -------
diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index 26e49cbb2..15583c58e 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -118,7 +118,8 @@ def _compute_and_update_PI_kernel(
 
     k : int
         The number of top `k` smallest distances used to construct the matrix profile.
-        Note that this will increase the total computational time and memory usage when k > 1.
+        Note that this will increase the total computational time and memory usage
+        when k > 1.
 
     Returns
     -------
@@ -275,7 +276,8 @@ def _gpu_stump(
 
     k : int
         The number of top `k` smallest distances used to construct the matrix profile.
-        Note that this will increase the total computational time and memory usage when k > 1.
+        Note that this will increase the total computational time and memory usage
+        when k > 1.
 
     Returns
     -------
@@ -486,20 +488,22 @@ def gpu_stump(
 
     k : int, default 1
         The number of top `k` smallest distances used to construct the matrix profile.
-        Note that this will increase the total computational time and memory usage when k > 1.
+        Note that this will increase the total computational time and memory usage
+        when k > 1.
 
     Returns
     -------
     out : numpy.ndarray
         When k = 1 (default), the first column consists of the matrix profile,
         the second column consists of the matrix profile indices, the third column
-        consists of the left matrix profile indices, and the fourth column consists of
-        the right matrix profile indices. However, when k > 1, the output array will
-        contain exactly 2 * k + 2 columns. The first k columns (i.e., out[:, :k]) consists
-        of the top-k matrix profile, the next set of k columns (i.e., out[:, k:2k]) consists
-        of the corresponding top-k matrix profile indices, and the last two columns
-        (i.e., out[:, 2k] and out[:, 2k+1] or, equivalently, out[:, -2] and out[:, -1]) correspond to
-        the top-1 left matrix profile indices and the top-1 right matrix profile indices, respectively.
+        consists of the left matrix profile indices, and the fourth column consists
+        of the right matrix profile indices. However, when k > 1, the output array
+        will contain exactly 2 * k + 2 columns. The first k columns (i.e., out[:, :k])
+        consists of the top-k matrix profile, the next set of k columns
+        (i.e., out[:, k:2k]) consists of the corresponding top-k matrix profile
+        indices, and the last two columns (i.e., out[:, 2k] and out[:, 2k+1] or,
+        equivalently, out[:, -2] and out[:, -1]) correspond to the top-1 left
+        matrix profile indices and the top-1 right matrix profile indices, respectively.
 
     See Also
     --------
diff --git a/stumpy/stump.py b/stumpy/stump.py
index bcf0d4103..f5a5fe811 100644
--- a/stumpy/stump.py
+++ b/stumpy/stump.py
@@ -144,7 +144,8 @@ def _compute_diagonal(
 
     k : int
         The number of top `k` smallest distances used to construct the matrix profile.
-        Note that this will increase the total computational time and memory usage when k > 1.
+        Note that this will increase the total computational time and memory usage
+        when k > 1.
 
     Returns
     -------
@@ -327,7 +328,8 @@ def _stump(
 
     k : int
         The number of top `k` smallest distances used to construct the matrix profile.
-        Note that this will increase the total computational time and memory usage when k > 1.
+        Note that this will increase the total computational time and memory usage
+        when k > 1.
 
     Returns
     -------
@@ -547,20 +549,22 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
 
     k : int, default 1
         The number of top `k` smallest distances used to construct the matrix profile.
-        Note that this will increase the total computational time and memory usage when k > 1.
+        Note that this will increase the total computational time and memory usage
+        when k > 1.
 
     Returns
     -------
     out : numpy.ndarray
         When k = 1 (default), the first column consists of the matrix profile,
         the second column consists of the matrix profile indices, the third column
-        consists of the left matrix profile indices, and the fourth column consists of
-        the right matrix profile indices. However, when k > 1, the output array will
-        contain exactly 2 * k + 2 columns. The first k columns (i.e., out[:, :k]) consists
-        of the top-k matrix profile, the next set of k columns (i.e., out[:, k:2k]) consists
-        of the corresponding top-k matrix profile indices, and the last two columns
-        (i.e., out[:, 2k] and out[:, 2k+1] or, equivalently, out[:, -2] and out[:, -1]) correspond to
-        the top-1 left matrix profile indices and the top-1 right matrix profile indices, respectively.
+        consists of the left matrix profile indices, and the fourth column consists
+        of the right matrix profile indices. However, when k > 1, the output array
+        will contain exactly 2 * k + 2 columns. The first k columns (i.e., out[:, :k])
+        consists of the top-k matrix profile, the next set of k columns
+        (i.e., out[:, k:2k]) consists of the corresponding top-k matrix profile
+        indices, and the last two columns (i.e., out[:, 2k] and out[:, 2k+1] or,
+        equivalently, out[:, -2] and out[:, -1]) correspond to the top-1 left
+        matrix profile indices and the top-1 right matrix profile indices, respectively.
 
     See Also
     --------
diff --git a/stumpy/stumped.py b/stumpy/stumped.py
index 0f6459db5..f98338ce9 100644
--- a/stumpy/stumped.py
+++ b/stumpy/stumped.py
@@ -58,20 +58,22 @@ def stumped(
 
     k : int, default 1
         The number of top `k` smallest distances used to construct the matrix profile.
-        Note that this will increase the total computational time and memory usage when k > 1.
+        Note that this will increase the total computational time and memory usage
+        when k > 1.
 
     Returns
     -------
     out : numpy.ndarray
         When k = 1 (default), the first column consists of the matrix profile,
         the second column consists of the matrix profile indices, the third column
-        consists of the left matrix profile indices, and the fourth column consists of
-        the right matrix profile indices. However, when k > 1, the output array will
-        contain exactly 2 * k + 2 columns. The first k columns (i.e., out[:, :k]) consists
-        of the top-k matrix profile, the next set of k columns (i.e., out[:, k:2k]) consists
-        of the corresponding top-k matrix profile indices, and the last two columns
-        (i.e., out[:, 2k] and out[:, 2k+1] or, equivalently, out[:, -2] and out[:, -1]) correspond to
-        the top-1 left matrix profile indices and the top-1 right matrix profile indices, respectively.
+        consists of the left matrix profile indices, and the fourth column consists
+        of the right matrix profile indices. However, when k > 1, the output array
+        will contain exactly 2 * k + 2 columns. The first k columns (i.e., out[:, :k])
+        consists of the top-k matrix profile, the next set of k columns
+        (i.e., out[:, k:2k]) consists of the corresponding top-k matrix profile
+        indices, and the last two columns (i.e., out[:, 2k] and out[:, 2k+1] or,
+        equivalently, out[:, -2] and out[:, -1]) correspond to the top-1 left
+        matrix profile indices and the top-1 right matrix profile indices, respectively.
 
     See Also
     --------

From 5b7da52bf1a936a147d47321e06653a67da1db29 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 22 May 2022 18:02:04 -0600
Subject: [PATCH 108/151] Add parameter k to avoid failure in non-normalized
 decorater unit test

---
 stumpy/gpu_aamp.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/stumpy/gpu_aamp.py b/stumpy/gpu_aamp.py
index e62be7b02..0c9a21a85 100644
--- a/stumpy/gpu_aamp.py
+++ b/stumpy/gpu_aamp.py
@@ -339,7 +339,9 @@ def _gpu_aamp(
     return profile_fname, indices_fname
 
 
-def gpu_aamp(T_A, m, T_B=None, ignore_trivial=True, device_id=0, p=2.0):
+def gpu_aamp(T_A, m, T_B=None, ignore_trivial=True, device_id=0, p=2.0, k=1):
+    # function needs to be revised to return (top-k) matrix profile and
+    # matrix profile indices
     """
     Compute the non-normalized (i.e., without z-normalization) matrix profile with one
     or more GPU devices
@@ -375,6 +377,11 @@ def gpu_aamp(T_A, m, T_B=None, ignore_trivial=True, device_id=0, p=2.0):
     p : float, default 2.0
         The p-norm to apply for computing the Minkowski distance.
 
+    k : int, default 1
+        The number of top `k` smallest distances used to construct the matrix profile.
+        Note that this will increase the total computational time and memory usage
+        when k > 1.
+
     Returns
     -------
     out : numpy.ndarray

From e983ef0997ac2e4bcf1c14387be5ec617ec66a4d Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 22 May 2022 20:34:58 -0600
Subject: [PATCH 109/151] Skip a for-loop in unit test coverage

---
 stumpy/gpu_stump.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index 15583c58e..0d76e19b6 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -741,7 +741,7 @@ def gpu_stump(
         os.remove(indices_L_fname)
         os.remove(indices_R_fname)
 
-    for i in range(1, len(device_ids)):
+    for i in range(1, len(device_ids)):  # pragma: no cover
         # Update (top-k) matrix profile and matrix profile indices
         core._merge_topk_PI(profile[0], profile[i], indices[0], indices[i])
 

From b0c5cace4951f97b201f7b42ca0d9627c22bf890 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 22 May 2022 20:37:07 -0600
Subject: [PATCH 110/151] All tests pass


From 787e3f761162475e556c7fb4bbc252796fa2f9a6 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 22 May 2022 21:20:50 -0600
Subject: [PATCH 111/151] Use randomly generated arrays for test function

---
 tests/test_core.py | 77 ++++++++++++++++++++++++++--------------------
 1 file changed, 44 insertions(+), 33 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 707893d14..21e08fd76 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1031,42 +1031,53 @@ def test_select_P_ABBA_val_inf():
 
 
 def test_merge_topk_PI():
-    PA = np.array(
-        [
-            [0.0, 0.0, 0.0, 0.0, 0.0],
-            [0.1, 0.2, 0.3, 0.4, 0.5],
-            [0.1, 0.2, 0.3, 0.4, 0.5],
-            [0.1, 0.2, 0.3, 0.4, 0.5],
-            [0.1, 0.2, 0.3, 0.4, 0.5],
-            [0.1, 0.2, 0.3, 0.4, 0.5],
-            [0.1, 0.1, 0.2, 0.3, 0.4],
-            [0.1, 0.2, np.inf, np.inf, np.inf],
-            [np.inf, np.inf, np.inf, np.inf, np.inf],
-        ]
-    )
-
-    PB = np.array(
-        [
-            [0.0, 0.0, 0.0, 0.0, 0.0],
-            [0.0, 0.15, 0.25, 0.35, 0.45],
-            [0.15, 0.25, 0.35, 0.45, 0.55],
-            [0.01, 0.02, 0.03, 0.04, 0.05],
-            [0.6, 0.7, 0.8, 0.9, 1],
-            [0.1, 0.1, 0.2, 0.3, 0.4],
-            [0.1, 0.2, 0.3, 0.4, 0.5],
-            [0.0, 0.3, np.inf, np.inf, np.inf],
-            [np.inf, np.inf, np.inf, np.inf, np.inf],
-        ]
-    )
-
-    n, k = PA.shape
+    n=50
+    k=5
+
+    PA = np.random.randint(0, 5, size=(n, k))
+    PA = np.sort(PA)
+
+    PB = np.random.randint(0, 5, size=(n, k))
+    PB = np.sort(PB)
+
+    #PA = np.array(
+    #    [
+    #        [0.0, 0.0, 0.0, 0.0, 0.0],
+    #        [0.1, 0.2, 0.3, 0.4, 0.5],
+    #        [0.1, 0.2, 0.3, 0.4, 0.5],
+    #        [0.1, 0.2, 0.3, 0.4, 0.5],
+    #        [0.1, 0.2, 0.3, 0.4, 0.5],
+    #        [0.1, 0.2, 0.3, 0.4, 0.5],
+    #        [0.1, 0.1, 0.2, 0.3, 0.4],
+    #        [0.1, 0.2, np.inf, np.inf, np.inf],
+    #        [np.inf, np.inf, np.inf, np.inf, np.inf],
+    #    ]
+    #)
+
+    #PB = np.array(
+    #    [
+    #        [0.0, 0.0, 0.0, 0.0, 0.0],
+    #        [0.0, 0.15, 0.25, 0.35, 0.45],
+    #        [0.15, 0.25, 0.35, 0.45, 0.55],
+    #        [0.01, 0.02, 0.03, 0.04, 0.05],
+    #        [0.6, 0.7, 0.8, 0.9, 1],
+    #        [0.1, 0.1, 0.2, 0.3, 0.4],
+    #        [0.1, 0.2, 0.3, 0.4, 0.5],
+    #        [0.0, 0.3, np.inf, np.inf, np.inf],
+    #        [np.inf, np.inf, np.inf, np.inf, np.inf],
+    #    ]
+    #)
 
     IA = np.arange(n * k).reshape(n, k)
     IB = IA.copy() + n * k
-    IA[7, 2:] = -1
-    IA[8, :] = -1
-    IB[7, 2:] = -1
-    IB[8, :] = -1
+
+    #n, k = PA.shape
+    #IA = np.arange(n * k).reshape(n, k)
+    #IB = IA.copy() + n * k
+    #IA[7, 2:] = -1
+    #IA[8, :] = -1
+    #IB[7, 2:] = -1
+    #IB[8, :] = -1
 
     ref_P = PA.copy()
     ref_I = IA.copy()

From 2ff2b85d7a4ec6f3bbea3dd21c05681a15e62dc7 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 22 May 2022 23:01:32 -0600
Subject: [PATCH 112/151] Add minor comment

---
 stumpy/core.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/stumpy/core.py b/stumpy/core.py
index 89b6266fc..7528d5f85 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2525,6 +2525,8 @@ def _merge_topk_PI(PA, PB, IA, IB):
             if PB[i, j] < PA[i, -1]:
                 idx = np.searchsorted(PA[i], PB[i, j], side="right")
 
+                # .copy() operation is needed to resolve wrong result that is
+                # caused by "prange"
                 PA[i, idx + 1 :] = PA[i, idx:-1].copy()
                 PA[i, idx] = PB[i, j]
                 IA[i, idx + 1 :] = IA[i, idx:-1].copy()

From c3060278426d583fa4a35c41b0c8758f8aa857a8 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sun, 22 May 2022 23:04:40 -0600
Subject: [PATCH 113/151] Erase unnecessary comments

---
 tests/test_core.py | 40 ++--------------------------------------
 1 file changed, 2 insertions(+), 38 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 21e08fd76..a1efbf681 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1031,8 +1031,8 @@ def test_select_P_ABBA_val_inf():
 
 
 def test_merge_topk_PI():
-    n=50
-    k=5
+    n = 50
+    k = 5
 
     PA = np.random.randint(0, 5, size=(n, k))
     PA = np.sort(PA)
@@ -1040,45 +1040,9 @@ def test_merge_topk_PI():
     PB = np.random.randint(0, 5, size=(n, k))
     PB = np.sort(PB)
 
-    #PA = np.array(
-    #    [
-    #        [0.0, 0.0, 0.0, 0.0, 0.0],
-    #        [0.1, 0.2, 0.3, 0.4, 0.5],
-    #        [0.1, 0.2, 0.3, 0.4, 0.5],
-    #        [0.1, 0.2, 0.3, 0.4, 0.5],
-    #        [0.1, 0.2, 0.3, 0.4, 0.5],
-    #        [0.1, 0.2, 0.3, 0.4, 0.5],
-    #        [0.1, 0.1, 0.2, 0.3, 0.4],
-    #        [0.1, 0.2, np.inf, np.inf, np.inf],
-    #        [np.inf, np.inf, np.inf, np.inf, np.inf],
-    #    ]
-    #)
-
-    #PB = np.array(
-    #    [
-    #        [0.0, 0.0, 0.0, 0.0, 0.0],
-    #        [0.0, 0.15, 0.25, 0.35, 0.45],
-    #        [0.15, 0.25, 0.35, 0.45, 0.55],
-    #        [0.01, 0.02, 0.03, 0.04, 0.05],
-    #        [0.6, 0.7, 0.8, 0.9, 1],
-    #        [0.1, 0.1, 0.2, 0.3, 0.4],
-    #        [0.1, 0.2, 0.3, 0.4, 0.5],
-    #        [0.0, 0.3, np.inf, np.inf, np.inf],
-    #        [np.inf, np.inf, np.inf, np.inf, np.inf],
-    #    ]
-    #)
-
     IA = np.arange(n * k).reshape(n, k)
     IB = IA.copy() + n * k
 
-    #n, k = PA.shape
-    #IA = np.arange(n * k).reshape(n, k)
-    #IB = IA.copy() + n * k
-    #IA[7, 2:] = -1
-    #IA[8, :] = -1
-    #IB[7, 2:] = -1
-    #IB[8, :] = -1
-
     ref_P = PA.copy()
     ref_I = IA.copy()
 

From 898e9f366d3d4a0cebc1bfdddd20c722a2594f26 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 23 May 2022 11:56:42 -0600
Subject: [PATCH 114/151] Remove unnecessary copy operation

---
 tests/test_core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index a1efbf681..3fa1447bd 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1041,7 +1041,7 @@ def test_merge_topk_PI():
     PB = np.sort(PB)
 
     IA = np.arange(n * k).reshape(n, k)
-    IB = IA.copy() + n * k
+    IB = IA + n * k
 
     ref_P = PA.copy()
     ref_I = IA.copy()

From 3541faec462fc0869af9bcb3b6eafc93469ebc21 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 24 May 2022 11:49:05 -0600
Subject: [PATCH 115/151] Major revision in function _merge_topk_PI

- use PB to get number of iterations for the two most outer for-loops
- improve Docstring
- use start and stop to narrow down the search space
- use for-loop instead of .copy() operation.
---
 stumpy/core.py | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index bce98964d..0cc858f93 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2505,10 +2505,12 @@ def _merge_topk_PI(PA, PB, IA, IB):
     Parameters
     ----------
     PA : numpy.ndarray
-        a (top-k) matrix profile
+        a (top-k) matrix profile, with ndim of 2, where values in each row are
+        sorted in ascending order. Also, it needs to be the same shape as PB.
 
     PB : numpy.ndarray
-        a (top-k) matrix profile
+        a (top-k) matrix profile, with ndim of 2, where values in each row are
+        sorted in ascending order. Also, it needs to be the same shape as PA.
 
     IA : numpy.ndarray
         a (top-k) matrix profile indices, corresponding to PA
@@ -2520,14 +2522,20 @@ def _merge_topk_PI(PA, PB, IA, IB):
     -------
     None
     """
-    for i in prange(PA.shape[0]):
-        for j in range(PA.shape[1]):
+    for i in prange(PB.shape[0]):
+        start = 0
+        stop = np.searchsorted(PA[i], PB[i, -1], side="right")
+
+        for j in range(PB.shape[1]):
             if PB[i, j] < PA[i, -1]:
-                idx = np.searchsorted(PA[i], PB[i, j], side="right")
+                idx = np.searchsorted(PA[i, start:stop], PB[i, j], side="right") + start
+
+                for g in range(PB.shape[1] - 1, idx, -1):
+                    PA[i, g] = PA[i, g - 1]
+                    IA[i, g] = IA[i, g - 1]
 
-                # .copy() operation is needed to resolve wrong result that is
-                # caused by "prange"
-                PA[i, idx + 1 :] = PA[i, idx:-1].copy()
                 PA[i, idx] = PB[i, j]
-                IA[i, idx + 1 :] = IA[i, idx:-1].copy()
                 IA[i, idx] = IB[i, j]
+
+                start = idx
+                stop += 1  # because of shifting elements to the right by one

From ce8cd4c599b8763519b483a4c9c3f695dc445350 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 28 May 2022 00:54:00 -0600
Subject: [PATCH 116/151] Add device function to find insertion index into
 sorted array

---
 stumpy/core.py | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/stumpy/core.py b/stumpy/core.py
index 535471761..200980648 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2604,3 +2604,48 @@ def _merge_topk_PI(PA, PB, IA, IB):
 
                 start = idx
                 stop += 1  # because of shifting elements to the right by one
+
+
+@cuda.jit("i8(f8[:], f8, i8[:], i8)", device=True)
+def _gpu_searchsorted_right(a, v, bfs, nlevel):
+    """
+    a device function in replace of numpy.searchsorted(a, v, side='right')
+
+    Parameters
+    ----------
+    a : numpy.ndarray
+        1-dim array sorted in ascending order.
+
+    v : float
+        value to insert into array `a`
+
+    bfs : numpy.ndarray
+        the level order indices from the implicit construction of a binary
+        search tree followed by a breadth first (level order) search.
+
+    nlevel : int
+        the number of levels in the binary search tree based from which the array
+        `bfs` is obtained.
+
+    Returns
+    -------
+    idx : int
+        the index of the insertion point
+    """
+    n = a.shape[0]
+    idx = 0
+    for level in range(nlevel):
+        if v < a[bfs[idx]]:
+            next_idx = 2 * idx + 1
+        else:
+            next_idx = 2 * idx + 2
+
+        if level == nlevel-1 or bfs[next_idx]<0:
+            if v < a[bfs[idx]]:
+                idx = max(bfs[idx], 0)
+            else:
+                idx = min(bfs[idx] + 1, n)
+            break
+        idx = next_idx
+
+    return idx

From 09bbe7fb689e47330aacac6737e56d5d0d416356 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 28 May 2022 01:11:01 -0600
Subject: [PATCH 117/151] Add test function for gpu_searchsorted

---
 tests/test_core.py | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/tests/test_core.py b/tests/test_core.py
index 4437149d8..7423718ab 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1086,3 +1086,37 @@ def test_merge_topk_PI():
     ref = np.column_stack((ref_P, ref_I))
     comp = np.column_stack((comp_P, comp_I))
     npt.assert_array_equal(ref, comp)
+
+
+def test_gpu_searchsorted():
+    # define a function the same as `core._gpu_searchsorted_right` but
+    # without cuda.jit decorator.
+    def gpu_searchsorted_right(a, v, bfs, nlevel):
+        n = a.shape[0]
+        idx = 0
+        for level in range(nlevel):
+            if v < a[bfs[idx]]:
+                next_idx = 2 * idx + 1
+            else:
+                next_idx = 2 * idx + 2
+
+            if level == nlevel-1 or bfs[next_idx]<0:
+                if v < a[bfs[idx]]:
+                    idx = max(bfs[idx], 0)
+                else:
+                    idx = min(bfs[idx] + 1, n)
+                break
+            idx = next_idx
+
+        return idx
+
+    for n in range(1, 100):
+        a = np.sort(np.random.rand(n))
+        bfs = core._bfs_indices(n, fill_value=-1)
+        nlevel = np.floor(np.log2(n) + 1).astype(np.int64)
+        for i in range(n):
+            v = a[i]
+            npt.assert_almost_equal(gpu_searchsorted_right(a, v, bfs, nlevel), np.searchsorted(a, v, side="right"))
+
+            v = a[i] + 0.001
+            npt.assert_almost_equal(gpu_searchsorted_right(a, v, bfs, nlevel), np.searchsorted(a, v, side="right"))

From 4948667e38c3b76c1421f4ccf0aedf05c9d82f96 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 28 May 2022 01:13:04 -0600
Subject: [PATCH 118/151] Correct format

---
 stumpy/core.py     |  2 +-
 tests/test_core.py | 12 +++++++++---
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 200980648..3245bd216 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2640,7 +2640,7 @@ def _gpu_searchsorted_right(a, v, bfs, nlevel):
         else:
             next_idx = 2 * idx + 2
 
-        if level == nlevel-1 or bfs[next_idx]<0:
+        if level == nlevel - 1 or bfs[next_idx] < 0:
             if v < a[bfs[idx]]:
                 idx = max(bfs[idx], 0)
             else:
diff --git a/tests/test_core.py b/tests/test_core.py
index 7423718ab..152a58a01 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1100,7 +1100,7 @@ def gpu_searchsorted_right(a, v, bfs, nlevel):
             else:
                 next_idx = 2 * idx + 2
 
-            if level == nlevel-1 or bfs[next_idx]<0:
+            if level == nlevel - 1 or bfs[next_idx] < 0:
                 if v < a[bfs[idx]]:
                     idx = max(bfs[idx], 0)
                 else:
@@ -1116,7 +1116,13 @@ def gpu_searchsorted_right(a, v, bfs, nlevel):
         nlevel = np.floor(np.log2(n) + 1).astype(np.int64)
         for i in range(n):
             v = a[i]
-            npt.assert_almost_equal(gpu_searchsorted_right(a, v, bfs, nlevel), np.searchsorted(a, v, side="right"))
+            npt.assert_almost_equal(
+                gpu_searchsorted_right(a, v, bfs, nlevel),
+                np.searchsorted(a, v, side="right"),
+            )
 
             v = a[i] + 0.001
-            npt.assert_almost_equal(gpu_searchsorted_right(a, v, bfs, nlevel), np.searchsorted(a, v, side="right"))
+            npt.assert_almost_equal(
+                gpu_searchsorted_right(a, v, bfs, nlevel),
+                np.searchsorted(a, v, side="right"),
+            )

From cdd7a334ac69408a2ba6810f521b5419afc9ed02 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 28 May 2022 01:25:55 -0600
Subject: [PATCH 119/151] Fixed minor bug

---
 stumpy/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 3245bd216..e2688459e 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -7,7 +7,7 @@
 import inspect
 
 import numpy as np
-from numba import njit, prange
+from numba import cuda, njit, prange
 from scipy.signal import convolve
 from scipy.ndimage import maximum_filter1d, minimum_filter1d
 from scipy import linalg

From 71ade4772dce47a9765c8f5081a02b523d8501fd Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 28 May 2022 11:52:57 -0600
Subject: [PATCH 120/151] Fixed the name of a variable

---
 stumpy/gpu_stump.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index 0d76e19b6..1b82707fb 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -319,7 +319,7 @@ def _gpu_stump(
     Note that left and right matrix profiles are only available for self-joins.
     """
     threads_per_block = config.STUMPY_THREADS_PER_BLOCK
-    blocks_per_grid = math.ceil(k / threads_per_block)
+    blocks_per_grid = math.ceil(profile_len / threads_per_block)
 
     T_A = np.load(T_A_fname, allow_pickle=False)
     T_B = np.load(T_B_fname, allow_pickle=False)

From ac472fc2331f6ef03e2fb5b08fb0c05090d15341 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 28 May 2022 15:02:19 -0600
Subject: [PATCH 121/151] Fixed grammatical error in docstring

---
 stumpy/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index e2688459e..9a7b1012b 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2624,7 +2624,7 @@ def _gpu_searchsorted_right(a, v, bfs, nlevel):
         search tree followed by a breadth first (level order) search.
 
     nlevel : int
-        the number of levels in the binary search tree based from which the array
+        the number of levels in the binary search tree from which the array
         `bfs` is obtained.
 
     Returns

From fc149e688ce2f1bdac409ec66b7376a881edcd21 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 28 May 2022 15:09:01 -0600
Subject: [PATCH 122/151] Use device function for searchsorting

---
 stumpy/gpu_stump.py | 33 +++++++++++++++++++++++++--------
 1 file changed, 25 insertions(+), 8 deletions(-)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index 1b82707fb..d8ad43fe8 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -17,7 +17,7 @@
 
 @cuda.jit(
     "(i8, f8[:], f8[:], i8,  f8[:], f8[:], f8[:], f8[:], f8[:],"
-    "f8[:], f8[:], i8, b1, i8, f8[:, :], f8[:], f8[:], i8[:, :], i8[:], i8[:], b1, i2)"
+    "f8[:], f8[:], i8, b1, i8, f8[:, :], f8[:], f8[:], i8[:, :], i8[:], i8[:], b1, i8[:], i8, i2)"
 )
 def _compute_and_update_PI_kernel(
     i,
@@ -41,6 +41,8 @@ def _compute_and_update_PI_kernel(
     indices_L,
     indices_R,
     compute_QT,
+    bfs,
+    nlevel,
     k,
 ):
     """
@@ -116,6 +118,14 @@ def _compute_and_update_PI_kernel(
     compute_QT : bool
         A boolean flag for whether or not to compute QT
 
+    bfs : numpy.ndarray
+        the level order indices from the implicit construction of a binary
+        search tree followed by a breadth first (level order) search.
+
+    nlevel : int
+        the number of levels in the binary search tree from which the array
+        `bfs` is obtained.
+
     k : int
         The number of top `k` smallest distances used to construct the matrix profile.
         Note that this will increase the total computational time and memory usage
@@ -182,13 +192,12 @@ def _compute_and_update_PI_kernel(
                 profile_R[j] = p_norm
                 indices_R[j] = i
 
-        for idx in range(k, -1, -1):
-            if (p_norm < profile[j, idx - 1]) and (idx > 0):
-                profile[j, idx - 1] = profile[j, idx - 2]
-                indices[j, idx - 1] = indices[j, idx - 2]
-            else:
-                break
-        if idx < k:
+        if p_norm < profile[j, -1]:
+            idx = core._gpu_searchsorted_right(profile[j], p_norm, bfs, nlevel)
+            for g in range(k - 1, idx, -1):
+                profile[j, g] = profile[j, g - 1]
+                indices[j, g] = indices[j, g - 1]
+
             profile[j, idx] = p_norm
             indices[j, idx] = i
 
@@ -318,6 +327,10 @@ def _gpu_stump(
 
     Note that left and right matrix profiles are only available for self-joins.
     """
+    bfs = core._bfs_indices(k, fill_value=-1)
+    nlevel = np.floor(np.log2(k) + 1).astype(np.int64)  # number of levels in
+    # binary seearch tree from which `bfs` is constructed.
+
     threads_per_block = config.STUMPY_THREADS_PER_BLOCK
     blocks_per_grid = math.ceil(profile_len / threads_per_block)
 
@@ -384,6 +397,8 @@ def _gpu_stump(
             device_indices_L,
             device_indices_R,
             False,
+            bfs,
+            nlevel,
             k,
         )
 
@@ -410,6 +425,8 @@ def _gpu_stump(
                 device_indices_L,
                 device_indices_R,
                 True,
+                bfs,
+                nlevel,
                 k,
             )
 

From 7ac67a8302ddcbd0d3affc0538b891fe19a92b17 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 28 May 2022 15:16:31 -0600
Subject: [PATCH 123/151] Correct style

---
 stumpy/core.py      | 2 +-
 stumpy/gpu_stump.py | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 9a7b1012b..101813759 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2609,7 +2609,7 @@ def _merge_topk_PI(PA, PB, IA, IB):
 @cuda.jit("i8(f8[:], f8, i8[:], i8)", device=True)
 def _gpu_searchsorted_right(a, v, bfs, nlevel):
     """
-    a device function in replace of numpy.searchsorted(a, v, side='right')
+    Device function to replace numpy.searchsorted(a, v, side='right')
 
     Parameters
     ----------
diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index d8ad43fe8..1a379eda0 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -17,7 +17,8 @@
 
 @cuda.jit(
     "(i8, f8[:], f8[:], i8,  f8[:], f8[:], f8[:], f8[:], f8[:],"
-    "f8[:], f8[:], i8, b1, i8, f8[:, :], f8[:], f8[:], i8[:, :], i8[:], i8[:], b1, i8[:], i8, i2)"
+    "f8[:], f8[:], i8, b1, i8, f8[:, :], f8[:], f8[:], i8[:, :], i8[:], i8[:],"
+    "b1, i8[:], i8, i2)"
 )
 def _compute_and_update_PI_kernel(
     i,

From 92467e24387e490b0289a37738261904ce3148d7 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 28 May 2022 17:27:06 -0600
Subject: [PATCH 124/151] Remove signature from cuda device function

---
 stumpy/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 101813759..f342d888e 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2606,7 +2606,7 @@ def _merge_topk_PI(PA, PB, IA, IB):
                 stop += 1  # because of shifting elements to the right by one
 
 
-@cuda.jit("i8(f8[:], f8, i8[:], i8)", device=True)
+@cuda.jit(device=True)
 def _gpu_searchsorted_right(a, v, bfs, nlevel):
     """
     Device function to replace numpy.searchsorted(a, v, side='right')

From bdfb258ea5e516f1c064141fe3d1d15dc895b858 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 28 May 2022 20:37:23 -0600
Subject: [PATCH 125/151] Full Coverage confirmed


From bb5de99711bd580b77cc407cc5091ace97839c5c Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Sat, 28 May 2022 20:45:12 -0600
Subject: [PATCH 126/151] revising the definiton of parameter bfs in docstring

---
 stumpy/core.py      | 4 ++--
 stumpy/gpu_stump.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index f342d888e..54eb29e4c 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2620,8 +2620,8 @@ def _gpu_searchsorted_right(a, v, bfs, nlevel):
         value to insert into array `a`
 
     bfs : numpy.ndarray
-        the level order indices from the implicit construction of a binary
-        search tree followed by a breadth first (level order) search.
+        The breadth-first-search indices where the missing leaves of its corresponding
+        binary search tree are filled with -1.
 
     nlevel : int
         the number of levels in the binary search tree from which the array
diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index 1a379eda0..d8d877078 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -120,8 +120,8 @@ def _compute_and_update_PI_kernel(
         A boolean flag for whether or not to compute QT
 
     bfs : numpy.ndarray
-        the level order indices from the implicit construction of a binary
-        search tree followed by a breadth first (level order) search.
+        The breadth-first-search indices where the missing leaves of its corresponding
+        binary search tree are filled with -1.
 
     nlevel : int
         the number of levels in the binary search tree from which the array

From a005a415482dbce75a6030a5e0a3e98118cad333 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 30 May 2022 01:56:24 -0600
Subject: [PATCH 127/151] Copy array into device memory before passing it to
 kernel function

---
 stumpy/gpu_stump.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index d8d877078..a7682f52f 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -328,10 +328,6 @@ def _gpu_stump(
 
     Note that left and right matrix profiles are only available for self-joins.
     """
-    bfs = core._bfs_indices(k, fill_value=-1)
-    nlevel = np.floor(np.log2(k) + 1).astype(np.int64)  # number of levels in
-    # binary seearch tree from which `bfs` is constructed.
-
     threads_per_block = config.STUMPY_THREADS_PER_BLOCK
     blocks_per_grid = math.ceil(profile_len / threads_per_block)
 
@@ -344,6 +340,11 @@ def _gpu_stump(
     μ_Q = np.load(μ_Q_fname, allow_pickle=False)
     σ_Q = np.load(σ_Q_fname, allow_pickle=False)
 
+
+    device_bfs = cuda.to_device(core._bfs_indices(k, fill_value=-1))
+    nlevel = np.floor(np.log2(k) + 1).astype(np.int64)
+    # number of levels in # binary seearch tree from which `bfs` is constructed.
+
     with cuda.gpus[device_id]:
         device_T_A = cuda.to_device(T_A)
         device_QT_odd = cuda.to_device(QT)
@@ -398,7 +399,7 @@ def _gpu_stump(
             device_indices_L,
             device_indices_R,
             False,
-            bfs,
+            device_bfs,
             nlevel,
             k,
         )
@@ -426,7 +427,7 @@ def _gpu_stump(
                 device_indices_L,
                 device_indices_R,
                 True,
-                bfs,
+                device_bfs,
                 nlevel,
                 k,
             )

From ade9bb4f37295d1c0ae831672356a27ce625ff31 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 30 May 2022 16:58:35 -0600
Subject: [PATCH 128/151] use float values for generating arrays

---
 tests/test_core.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 152a58a01..e25d6a664 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1065,11 +1065,14 @@ def test_merge_topk_PI():
     n = 50
     k = 5
 
-    PA = np.random.randint(0, 5, size=(n, k))
-    PA = np.sort(PA)
+    PA = np.random.rand(n * k).reshape(n, k)
+    PA = np.sort(PA, axis=1)
 
-    PB = np.random.randint(0, 5, size=(n, k))
-    PB = np.sort(PB)
+    PB = np.random.rand(n * k).reshape(n, k)
+    col_idx = np.random.randint(0, k, size=n)
+    for i in range(n):
+        PB[i, col_idx[i]] = np.random.choice(PA[i], size=1, replace=False)
+    PB = np.sort(PB, axis=1)
 
     IA = np.arange(n * k).reshape(n, k)
     IB = IA + n * k
@@ -1083,9 +1086,8 @@ def test_merge_topk_PI():
     naive.merge_topk_PI(ref_P, PB, ref_I, IB)
     core._merge_topk_PI(comp_P, PB, comp_I, IB)
 
-    ref = np.column_stack((ref_P, ref_I))
-    comp = np.column_stack((comp_P, comp_I))
-    npt.assert_array_equal(ref, comp)
+    npt.assert_array_equal(ref_P, comp_P)
+    npt.assert_array_equal(ref_I, comp_I)
 
 
 def test_gpu_searchsorted():

From 853c2ec805e37b7839983856f9ba0e882da3730a Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 31 May 2022 06:27:54 -0600
Subject: [PATCH 129/151] move device function to gpu_stump module

---
 stumpy/core.py      | 45 ---------------------------------------------
 stumpy/gpu_stump.py | 45 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 54eb29e4c..0ebb5ae50 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -2604,48 +2604,3 @@ def _merge_topk_PI(PA, PB, IA, IB):
 
                 start = idx
                 stop += 1  # because of shifting elements to the right by one
-
-
-@cuda.jit(device=True)
-def _gpu_searchsorted_right(a, v, bfs, nlevel):
-    """
-    Device function to replace numpy.searchsorted(a, v, side='right')
-
-    Parameters
-    ----------
-    a : numpy.ndarray
-        1-dim array sorted in ascending order.
-
-    v : float
-        value to insert into array `a`
-
-    bfs : numpy.ndarray
-        The breadth-first-search indices where the missing leaves of its corresponding
-        binary search tree are filled with -1.
-
-    nlevel : int
-        the number of levels in the binary search tree from which the array
-        `bfs` is obtained.
-
-    Returns
-    -------
-    idx : int
-        the index of the insertion point
-    """
-    n = a.shape[0]
-    idx = 0
-    for level in range(nlevel):
-        if v < a[bfs[idx]]:
-            next_idx = 2 * idx + 1
-        else:
-            next_idx = 2 * idx + 2
-
-        if level == nlevel - 1 or bfs[next_idx] < 0:
-            if v < a[bfs[idx]]:
-                idx = max(bfs[idx], 0)
-            else:
-                idx = min(bfs[idx] + 1, n)
-            break
-        idx = next_idx
-
-    return idx
diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index a7682f52f..ec6db99d3 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -15,6 +15,51 @@
 logger = logging.getLogger(__name__)
 
 
+@cuda.jit(device=True)
+def _gpu_searchsorted_right(a, v, bfs, nlevel):
+    """
+    Device function to replace numpy.searchsorted(a, v, side='right')
+
+    Parameters
+    ----------
+    a : numpy.ndarray
+        1-dim array sorted in ascending order.
+
+    v : float
+        value to insert into array `a`
+
+    bfs : numpy.ndarray
+        The breadth-first-search indices where the missing leaves of its corresponding
+        binary search tree are filled with -1.
+
+    nlevel : int
+        the number of levels in the binary search tree from which the array
+        `bfs` is obtained.
+
+    Returns
+    -------
+    idx : int
+        the index of the insertion point
+    """
+    n = a.shape[0]
+    idx = 0
+    for level in range(nlevel):
+        if v < a[bfs[idx]]:
+            next_idx = 2 * idx + 1
+        else:
+            next_idx = 2 * idx + 2
+
+        if level == nlevel - 1 or bfs[next_idx] < 0:
+            if v < a[bfs[idx]]:
+                idx = max(bfs[idx], 0)
+            else:
+                idx = min(bfs[idx] + 1, n)
+            break
+        idx = next_idx
+
+    return idx
+
+
 @cuda.jit(
     "(i8, f8[:], f8[:], i8,  f8[:], f8[:], f8[:], f8[:], f8[:],"
     "f8[:], f8[:], i8, b1, i8, f8[:, :], f8[:], f8[:], i8[:, :], i8[:], i8[:],"

From e3b5119246a964bb46e560a525832fe68b397bf4 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 31 May 2022 06:29:33 -0600
Subject: [PATCH 130/151] Add gpu_searchsorted_left for the sake completeness

---
 stumpy/gpu_stump.py | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index ec6db99d3..c7f7aec16 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -15,6 +15,51 @@
 logger = logging.getLogger(__name__)
 
 
+@cuda.jit(device=True)
+def _gpu_searchsorted_left(a, v, bfs, nlevel):
+    """
+    Device function to replace numpy.searchsorted(a, v, side='left')
+
+    Parameters
+    ----------
+    a : numpy.ndarray
+        1-dim array sorted in ascending order.
+
+    v : float
+        value to insert into array `a`
+
+    bfs : numpy.ndarray
+        The breadth-first-search indices where the missing leaves of its corresponding
+        binary search tree are filled with -1.
+
+    nlevel : int
+        the number of levels in the binary search tree from which the array
+        `bfs` is obtained.
+
+    Returns
+    -------
+    idx : int
+        the index of the insertion point
+    """
+    n = a.shape[0]
+    idx = 0
+    for level in range(nlevel):
+        if v <= a[bfs[idx]]:
+            next_idx = 2 * idx + 1
+        else:
+            next_idx = 2 * idx + 2
+
+        if level == nlevel - 1 or bfs[next_idx] < 0:
+            if v <= a[bfs[idx]]:
+                idx = max(bfs[idx], 0)
+            else:
+                idx = min(bfs[idx] + 1, n)
+            break
+        idx = next_idx
+
+    return idx
+
+
 @cuda.jit(device=True)
 def _gpu_searchsorted_right(a, v, bfs, nlevel):
     """

From c5779e551e2288f3db60ea93d9293cf60a70c2bd Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 31 May 2022 06:46:04 -0600
Subject: [PATCH 131/151] Move test function to test_gpu_stump

---
 tests/test_core.py      | 40 ----------------------------------------
 tests/test_gpu_stump.py | 18 ++++++++++++++++++
 2 files changed, 18 insertions(+), 40 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index e25d6a664..528286061 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1088,43 +1088,3 @@ def test_merge_topk_PI():
 
     npt.assert_array_equal(ref_P, comp_P)
     npt.assert_array_equal(ref_I, comp_I)
-
-
-def test_gpu_searchsorted():
-    # define a function the same as `core._gpu_searchsorted_right` but
-    # without cuda.jit decorator.
-    def gpu_searchsorted_right(a, v, bfs, nlevel):
-        n = a.shape[0]
-        idx = 0
-        for level in range(nlevel):
-            if v < a[bfs[idx]]:
-                next_idx = 2 * idx + 1
-            else:
-                next_idx = 2 * idx + 2
-
-            if level == nlevel - 1 or bfs[next_idx] < 0:
-                if v < a[bfs[idx]]:
-                    idx = max(bfs[idx], 0)
-                else:
-                    idx = min(bfs[idx] + 1, n)
-                break
-            idx = next_idx
-
-        return idx
-
-    for n in range(1, 100):
-        a = np.sort(np.random.rand(n))
-        bfs = core._bfs_indices(n, fill_value=-1)
-        nlevel = np.floor(np.log2(n) + 1).astype(np.int64)
-        for i in range(n):
-            v = a[i]
-            npt.assert_almost_equal(
-                gpu_searchsorted_right(a, v, bfs, nlevel),
-                np.searchsorted(a, v, side="right"),
-            )
-
-            v = a[i] + 0.001
-            npt.assert_almost_equal(
-                gpu_searchsorted_right(a, v, bfs, nlevel),
-                np.searchsorted(a, v, side="right"),
-            )
diff --git a/tests/test_gpu_stump.py b/tests/test_gpu_stump.py
index 1a2662647..dfbf5e405 100644
--- a/tests/test_gpu_stump.py
+++ b/tests/test_gpu_stump.py
@@ -38,6 +38,24 @@ def test_gpu_stump_int_input():
     with pytest.raises(TypeError):
         gpu_stump(np.arange(10), 5, ignore_trivial=True)
 
+def test_gpu_searchsorted():
+    for n in range(1, 100):
+        a = np.sort(np.random.rand(n))
+        bfs = core._bfs_indices(n, fill_value=-1)
+        nlevel = np.floor(np.log2(n) + 1).astype(np.int64)
+        for i in range(n):
+             v = a[i] - 0.001
+            npt.assert_almost_equal(gpu_stump._gpu_searchsorted_left(a, v, bfs, nlevel), np.searchsorted(a, v, side="left"))
+            npt.assert_almost_equal(gpu_stump._gpu_searchsorted_right(a, v, bfs, nlevel), np.searchsorted(a, v, side="right"))
+
+            v = a[i]
+            npt.assert_almost_equal(gpu_stump._gpu_searchsorted_left(a, v, bfs, nlevel), np.searchsorted(a, v, side="left"))
+            npt.assert_almost_equal(gpu_stump._gpu_searchsorted_right(a, v, bfs, nlevel), np.searchsorted(a, v, side="right"))
+
+            v = a[i] + 0.001
+            npt.assert_almost_equal(gpu_stump._gpu_searchsorted_left(a, v, bfs, nlevel), np.searchsorted(a, v, side="left"))
+            npt.assert_almost_equal(gpu_stump._gpu_searchsorted_right(a, v, bfs, nlevel), np.searchsorted(a, v, side="right"))
+
 
 @pytest.mark.filterwarnings("ignore", category=NumbaPerformanceWarning)
 @pytest.mark.parametrize("T_A, T_B", test_data)

From 38e531c34a63e3f4a98f476c9501b705de2a2b29 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 31 May 2022 06:50:47 -0600
Subject: [PATCH 132/151] correct format

---
 stumpy/core.py          |  2 +-
 stumpy/gpu_stump.py     |  1 -
 tests/test_gpu_stump.py | 35 +++++++++++++++++++++++++++--------
 3 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/stumpy/core.py b/stumpy/core.py
index 0ebb5ae50..535471761 100644
--- a/stumpy/core.py
+++ b/stumpy/core.py
@@ -7,7 +7,7 @@
 import inspect
 
 import numpy as np
-from numba import cuda, njit, prange
+from numba import njit, prange
 from scipy.signal import convolve
 from scipy.ndimage import maximum_filter1d, minimum_filter1d
 from scipy import linalg
diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index c7f7aec16..22748e089 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -430,7 +430,6 @@ def _gpu_stump(
     μ_Q = np.load(μ_Q_fname, allow_pickle=False)
     σ_Q = np.load(σ_Q_fname, allow_pickle=False)
 
-
     device_bfs = cuda.to_device(core._bfs_indices(k, fill_value=-1))
     nlevel = np.floor(np.log2(k) + 1).astype(np.int64)
     # number of levels in # binary seearch tree from which `bfs` is constructed.
diff --git a/tests/test_gpu_stump.py b/tests/test_gpu_stump.py
index dfbf5e405..1e79fb577 100644
--- a/tests/test_gpu_stump.py
+++ b/tests/test_gpu_stump.py
@@ -1,7 +1,7 @@
 import numpy as np
 import numpy.testing as npt
 import pandas as pd
-from stumpy import gpu_stump
+from stumpy import core, gpu_stump
 from stumpy import config
 from numba import cuda
 
@@ -38,23 +38,42 @@ def test_gpu_stump_int_input():
     with pytest.raises(TypeError):
         gpu_stump(np.arange(10), 5, ignore_trivial=True)
 
+
 def test_gpu_searchsorted():
     for n in range(1, 100):
         a = np.sort(np.random.rand(n))
         bfs = core._bfs_indices(n, fill_value=-1)
         nlevel = np.floor(np.log2(n) + 1).astype(np.int64)
         for i in range(n):
-             v = a[i] - 0.001
-            npt.assert_almost_equal(gpu_stump._gpu_searchsorted_left(a, v, bfs, nlevel), np.searchsorted(a, v, side="left"))
-            npt.assert_almost_equal(gpu_stump._gpu_searchsorted_right(a, v, bfs, nlevel), np.searchsorted(a, v, side="right"))
+            v = a[i] - 0.001
+            npt.assert_almost_equal(
+                gpu_stump._gpu_searchsorted_left(a, v, bfs, nlevel),
+                np.searchsorted(a, v, side="left"),
+            )
+            npt.assert_almost_equal(
+                gpu_stump._gpu_searchsorted_right(a, v, bfs, nlevel),
+                np.searchsorted(a, v, side="right"),
+            )
 
             v = a[i]
-            npt.assert_almost_equal(gpu_stump._gpu_searchsorted_left(a, v, bfs, nlevel), np.searchsorted(a, v, side="left"))
-            npt.assert_almost_equal(gpu_stump._gpu_searchsorted_right(a, v, bfs, nlevel), np.searchsorted(a, v, side="right"))
+            npt.assert_almost_equal(
+                gpu_stump._gpu_searchsorted_left(a, v, bfs, nlevel),
+                np.searchsorted(a, v, side="left"),
+            )
+            npt.assert_almost_equal(
+                gpu_stump._gpu_searchsorted_right(a, v, bfs, nlevel),
+                np.searchsorted(a, v, side="right"),
+            )
 
             v = a[i] + 0.001
-            npt.assert_almost_equal(gpu_stump._gpu_searchsorted_left(a, v, bfs, nlevel), np.searchsorted(a, v, side="left"))
-            npt.assert_almost_equal(gpu_stump._gpu_searchsorted_right(a, v, bfs, nlevel), np.searchsorted(a, v, side="right"))
+            npt.assert_almost_equal(
+                gpu_stump._gpu_searchsorted_left(a, v, bfs, nlevel),
+                np.searchsorted(a, v, side="left"),
+            )
+            npt.assert_almost_equal(
+                gpu_stump._gpu_searchsorted_right(a, v, bfs, nlevel),
+                np.searchsorted(a, v, side="right"),
+            )
 
 
 @pytest.mark.filterwarnings("ignore", category=NumbaPerformanceWarning)

From 5a7b3c099419de1a09368f2930eabce410730693 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 31 May 2022 08:26:58 -0600
Subject: [PATCH 133/151] Fixed calling function

---
 stumpy/gpu_stump.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index 22748e089..bf7e3b57d 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -284,7 +284,7 @@ def _compute_and_update_PI_kernel(
                 indices_R[j] = i
 
         if p_norm < profile[j, -1]:
-            idx = core._gpu_searchsorted_right(profile[j], p_norm, bfs, nlevel)
+            idx = _gpu_searchsorted_right(profile[j], p_norm, bfs, nlevel)
             for g in range(k - 1, idx, -1):
                 profile[j, g] = profile[j, g - 1]
                 indices[j, g] = indices[j, g - 1]

From e1b0d205e463fd2e02a906ab349ca492d303be27 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 31 May 2022 08:32:25 -0600
Subject: [PATCH 134/151] Make function callable from both CPU and GPU

to avoid duplication for unit testing.
---
 stumpy/gpu_stump.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index bf7e3b57d..99a3ba839 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -7,7 +7,7 @@
 import os
 
 import numpy as np
-from numba import cuda
+from numba import cuda, jit
 
 from . import core, config
 from .gpu_aamp import gpu_aamp
@@ -15,10 +15,11 @@
 logger = logging.getLogger(__name__)
 
 
-@cuda.jit(device=True)
+@jit # equivalent to `__host__ __device__` in C++ CUDA
 def _gpu_searchsorted_left(a, v, bfs, nlevel):
     """
-    Device function to replace numpy.searchsorted(a, v, side='left')
+    A function equivalent to numpy.searchsorted(a, v, side='left'), designed
+    to be used mainly as device function
 
     Parameters
     ----------
@@ -60,7 +61,7 @@ def _gpu_searchsorted_left(a, v, bfs, nlevel):
     return idx
 
 
-@cuda.jit(device=True)
+@jit # equivalent to `__host__ __device__` in C++ CUDA
 def _gpu_searchsorted_right(a, v, bfs, nlevel):
     """
     Device function to replace numpy.searchsorted(a, v, side='right')

From 922544c3ae21d018db7600d4b466a2ae40d107fd Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 31 May 2022 08:46:07 -0600
Subject: [PATCH 135/151] Fixed calling function

---
 tests/test_gpu_stump.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/tests/test_gpu_stump.py b/tests/test_gpu_stump.py
index 1e79fb577..108ac0d91 100644
--- a/tests/test_gpu_stump.py
+++ b/tests/test_gpu_stump.py
@@ -2,6 +2,7 @@
 import numpy.testing as npt
 import pandas as pd
 from stumpy import core, gpu_stump
+from stumpy.gpu_stump import _gpu_searchsorted_left, _gpu_searchsorted_right
 from stumpy import config
 from numba import cuda
 
@@ -47,31 +48,31 @@ def test_gpu_searchsorted():
         for i in range(n):
             v = a[i] - 0.001
             npt.assert_almost_equal(
-                gpu_stump._gpu_searchsorted_left(a, v, bfs, nlevel),
+                _gpu_searchsorted_left(a, v, bfs, nlevel),
                 np.searchsorted(a, v, side="left"),
             )
             npt.assert_almost_equal(
-                gpu_stump._gpu_searchsorted_right(a, v, bfs, nlevel),
+                _gpu_searchsorted_right(a, v, bfs, nlevel),
                 np.searchsorted(a, v, side="right"),
             )
 
             v = a[i]
             npt.assert_almost_equal(
-                gpu_stump._gpu_searchsorted_left(a, v, bfs, nlevel),
+                _gpu_searchsorted_left(a, v, bfs, nlevel),
                 np.searchsorted(a, v, side="left"),
             )
             npt.assert_almost_equal(
-                gpu_stump._gpu_searchsorted_right(a, v, bfs, nlevel),
+                _gpu_searchsorted_right(a, v, bfs, nlevel),
                 np.searchsorted(a, v, side="right"),
             )
 
             v = a[i] + 0.001
             npt.assert_almost_equal(
-                gpu_stump._gpu_searchsorted_left(a, v, bfs, nlevel),
+                _gpu_searchsorted_left(a, v, bfs, nlevel),
                 np.searchsorted(a, v, side="left"),
             )
             npt.assert_almost_equal(
-                gpu_stump._gpu_searchsorted_right(a, v, bfs, nlevel),
+                _gpu_searchsorted_right(a, v, bfs, nlevel),
                 np.searchsorted(a, v, side="right"),
             )
 

From 102979b1235e00744567484b97a774658d3b2e1d Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 31 May 2022 08:51:31 -0600
Subject: [PATCH 136/151] Revised the test function for merge_topk_PI

---
 tests/test_core.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 528286061..a297dd3fa 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1066,11 +1066,12 @@ def test_merge_topk_PI():
     k = 5
 
     PA = np.random.rand(n * k).reshape(n, k)
-    PA = np.sort(PA, axis=1)
+    PA = np.sort(PA, axis=1)  # sorting each row separately
 
     PB = np.random.rand(n * k).reshape(n, k)
+
     col_idx = np.random.randint(0, k, size=n)
-    for i in range(n):
+    for i in range(n): # creating ties between values of PA and PB
         PB[i, col_idx[i]] = np.random.choice(PA[i], size=1, replace=False)
     PB = np.sort(PB, axis=1)
 

From a8aecf6679a9dbb02be80cdf75cf55ce99ae6aae Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 31 May 2022 09:18:04 -0600
Subject: [PATCH 137/151] Revise docstrings

---
 stumpy/gpu_stump.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index 99a3ba839..35bf3f12f 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -18,7 +18,7 @@
 @jit # equivalent to `__host__ __device__` in C++ CUDA
 def _gpu_searchsorted_left(a, v, bfs, nlevel):
     """
-    A function equivalent to numpy.searchsorted(a, v, side='left'), designed
+    Equivalent to numpy.searchsorted(a, v, side='left'), designed
     to be used mainly as device function
 
     Parameters
@@ -64,7 +64,8 @@ def _gpu_searchsorted_left(a, v, bfs, nlevel):
 @jit # equivalent to `__host__ __device__` in C++ CUDA
 def _gpu_searchsorted_right(a, v, bfs, nlevel):
     """
-    Device function to replace numpy.searchsorted(a, v, side='right')
+    Equivalent to numpy.searchsorted(a, v, side='left'), designed
+    to be used mainly as device function
 
     Parameters
     ----------

From 38318ecdb8ab602d8ceb9d8afe4d1abc1b6ed9ed Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 31 May 2022 09:19:31 -0600
Subject: [PATCH 138/151] Rename variable

---
 stumpy/gpu_stump.py | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index 35bf3f12f..9fb657668 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -124,7 +124,7 @@ def _compute_and_update_PI_kernel(
     Σ_T,
     μ_Q,
     σ_Q,
-    profile_len,
+    w,
     ignore_trivial,
     excl_zone,
     profile,
@@ -179,7 +179,7 @@ def _compute_and_update_PI_kernel(
     σ_Q : numpy.ndarray
         Standard deviation of the query sequence, `Q`
 
-    profile_len : int
+    w : int
         The total number of sliding windows to iterate over
 
     ignore_trivial : bool
@@ -247,7 +247,7 @@ def _compute_and_update_PI_kernel(
 
     for j in range(start, QT_out.shape[0], stride):
         zone_start = max(0, j - excl_zone)
-        zone_stop = min(profile_len, j + excl_zone)
+        zone_stop = min(w, j + excl_zone)
 
         if compute_QT:
             QT_out[j] = (
@@ -307,7 +307,7 @@ def _gpu_stump(
     QT_first_fname,
     μ_Q_fname,
     σ_Q_fname,
-    profile_len,
+    w,
     ignore_trivial=True,
     range_start=1,
     device_id=0,
@@ -362,7 +362,7 @@ def _gpu_stump(
         The file name for the standard deviation of the query sequence, `Q`,
         relative to the current sliding window
 
-    profile_len : int
+    w : int
         The total number of sliding windows to iterate over
 
     ignore_trivial : bool
@@ -421,7 +421,7 @@ def _gpu_stump(
     Note that left and right matrix profiles are only available for self-joins.
     """
     threads_per_block = config.STUMPY_THREADS_PER_BLOCK
-    blocks_per_grid = math.ceil(profile_len / threads_per_block)
+    blocks_per_grid = math.ceil(w / threads_per_block)
 
     T_A = np.load(T_A_fname, allow_pickle=False)
     T_B = np.load(T_B_fname, allow_pickle=False)
@@ -452,14 +452,14 @@ def _gpu_stump(
             device_M_T = cuda.to_device(M_T)
             device_Σ_T = cuda.to_device(Σ_T)
 
-        profile = np.full((profile_len, k), np.inf, dtype=np.float64)
-        indices = np.full((profile_len, k), -1, dtype=np.int64)
+        profile = np.full((w, k), np.inf, dtype=np.float64)
+        indices = np.full((w, k), -1, dtype=np.int64)
 
-        profile_L = np.full(profile_len, np.inf, dtype=np.float64)
-        indices_L = np.full(profile_len, -1, dtype=np.int64)
+        profile_L = np.full(w, np.inf, dtype=np.float64)
+        indices_L = np.full(w, -1, dtype=np.int64)
 
-        profile_R = np.full(profile_len, np.inf, dtype=np.float64)
-        indices_R = np.full(profile_len, -1, dtype=np.int64)
+        profile_R = np.full(w, np.inf, dtype=np.float64)
+        indices_R = np.full(w, -1, dtype=np.int64)
 
         device_profile = cuda.to_device(profile)
         device_profile_L = cuda.to_device(profile_L)
@@ -480,7 +480,7 @@ def _gpu_stump(
             device_Σ_T,
             device_μ_Q,
             device_σ_Q,
-            profile_len,
+            w,
             ignore_trivial,
             excl_zone,
             device_profile,
@@ -508,7 +508,7 @@ def _gpu_stump(
                 device_Σ_T,
                 device_μ_Q,
                 device_σ_Q,
-                profile_len,
+                w,
                 ignore_trivial,
                 excl_zone,
                 device_profile,
@@ -695,7 +695,7 @@ def gpu_stump(
         logger.warning("Try setting `ignore_trivial = False`.")
 
     n = T_B.shape[0]
-    profile_len = T_A.shape[0] - m + 1
+    w = T_A.shape[0] - m + 1
     l = n - m + 1
     excl_zone = int(
         np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)
@@ -765,7 +765,7 @@ def gpu_stump(
                     QT_first_fname,
                     μ_Q_fname,
                     σ_Q_fname,
-                    profile_len,
+                    w,
                     ignore_trivial,
                     start + 1,
                     device_ids[idx],
@@ -794,7 +794,7 @@ def gpu_stump(
                 QT_first_fname,
                 μ_Q_fname,
                 σ_Q_fname,
-                profile_len,
+                w,
                 ignore_trivial,
                 start + 1,
                 device_ids[idx],
@@ -866,7 +866,7 @@ def gpu_stump(
         indices_R[0] = np.where(cond, indices_R[0], indices_R[i])
 
     out = np.empty(
-        (profile_len, 2 * k + 2), dtype=object
+        (w, 2 * k + 2), dtype=object
     )  # last two columns are to store
     # (top-1) left/right matrix profile indices
     out[:, :k] = profile[0]

From 76f97cbb896f0d66819022cb0acfc43e011d67c0 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 31 May 2022 14:38:11 -0600
Subject: [PATCH 139/151] Corrected format

---
 stumpy/gpu_stump.py | 8 +++-----
 tests/test_core.py  | 2 +-
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index 9fb657668..371bbeaa4 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -15,7 +15,7 @@
 logger = logging.getLogger(__name__)
 
 
-@jit # equivalent to `__host__ __device__` in C++ CUDA
+@jit  # equivalent to `__host__ __device__` in C++ CUDA
 def _gpu_searchsorted_left(a, v, bfs, nlevel):
     """
     Equivalent to numpy.searchsorted(a, v, side='left'), designed
@@ -61,7 +61,7 @@ def _gpu_searchsorted_left(a, v, bfs, nlevel):
     return idx
 
 
-@jit # equivalent to `__host__ __device__` in C++ CUDA
+@jit  # equivalent to `__host__ __device__` in C++ CUDA
 def _gpu_searchsorted_right(a, v, bfs, nlevel):
     """
     Equivalent to numpy.searchsorted(a, v, side='left'), designed
@@ -865,9 +865,7 @@ def gpu_stump(
         profile_R[0] = np.where(cond, profile_R[0], profile_R[i])
         indices_R[0] = np.where(cond, indices_R[0], indices_R[i])
 
-    out = np.empty(
-        (w, 2 * k + 2), dtype=object
-    )  # last two columns are to store
+    out = np.empty((w, 2 * k + 2), dtype=object)  # last two columns are to store
     # (top-1) left/right matrix profile indices
     out[:, :k] = profile[0]
     out[:, k:] = np.column_stack((indices[0], indices_L[0], indices_R[0]))
diff --git a/tests/test_core.py b/tests/test_core.py
index a297dd3fa..63a33d1d0 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1071,7 +1071,7 @@ def test_merge_topk_PI():
     PB = np.random.rand(n * k).reshape(n, k)
 
     col_idx = np.random.randint(0, k, size=n)
-    for i in range(n): # creating ties between values of PA and PB
+    for i in range(n):  # creating ties between values of PA and PB
         PB[i, col_idx[i]] = np.random.choice(PA[i], size=1, replace=False)
     PB = np.sort(PB, axis=1)
 

From d45733cc1d2b9bc59d31714860869e8516038c77 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 6 Jun 2022 01:40:17 -0600
Subject: [PATCH 140/151] Fixed typo

---
 stumpy/gpu_stump.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index 371bbeaa4..d6e02f669 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -64,7 +64,7 @@ def _gpu_searchsorted_left(a, v, bfs, nlevel):
 @jit  # equivalent to `__host__ __device__` in C++ CUDA
 def _gpu_searchsorted_right(a, v, bfs, nlevel):
     """
-    Equivalent to numpy.searchsorted(a, v, side='left'), designed
+    Equivalent to numpy.searchsorted(a, v, side='right'), designed
     to be used mainly as device function
 
     Parameters

From b2db50585f64eb0d0a9ddc5fb6bdbdc9fb4e0011 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 6 Jun 2022 03:03:09 -0600
Subject: [PATCH 141/151] change decorator so that the function can be used
 ONLY as device function

---
 stumpy/gpu_stump.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index d6e02f669..e67a0a362 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -7,7 +7,7 @@
 import os
 
 import numpy as np
-from numba import cuda, jit
+from numba import cuda
 
 from . import core, config
 from .gpu_aamp import gpu_aamp
@@ -15,11 +15,11 @@
 logger = logging.getLogger(__name__)
 
 
-@jit  # equivalent to `__host__ __device__` in C++ CUDA
+@cuda.jit(device=True)
 def _gpu_searchsorted_left(a, v, bfs, nlevel):
     """
     Equivalent to numpy.searchsorted(a, v, side='left'), designed
-    to be used mainly as device function
+    to be used as device function
 
     Parameters
     ----------
@@ -61,11 +61,11 @@ def _gpu_searchsorted_left(a, v, bfs, nlevel):
     return idx
 
 
-@jit  # equivalent to `__host__ __device__` in C++ CUDA
+@cuda.jit(device=True)
 def _gpu_searchsorted_right(a, v, bfs, nlevel):
     """
     Equivalent to numpy.searchsorted(a, v, side='right'), designed
-    to be used mainly as device function
+    to be used as device function
 
     Parameters
     ----------

From b023b20dc0934d821a6cbfdb192597eb5f6ce361 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 6 Jun 2022 14:08:39 -0600
Subject: [PATCH 142/151] considered more than one value for parameter k in
 unit testing

---
 tests/test_gpu_stump.py | 18 +++++++++---------
 tests/test_stump.py     | 18 +++++++++---------
 tests/test_stumped.py   | 12 ++++++------
 3 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/tests/test_gpu_stump.py b/tests/test_gpu_stump.py
index 108ac0d91..02a415541 100644
--- a/tests/test_gpu_stump.py
+++ b/tests/test_gpu_stump.py
@@ -393,15 +393,15 @@ def test_gpu_stump_nan_zero_mean_self_join():
 @pytest.mark.filterwarnings("ignore", category=NumbaPerformanceWarning)
 @pytest.mark.parametrize("T_A, T_B", test_data)
 def test_gpu_stump_self_join_KNN(T_A, T_B):
-    k = 3
     m = 3
     zone = int(np.ceil(m / 4))
-    ref_mp = naive.stump(T_B, m, exclusion_zone=zone, row_wise=True, k=k)
-    comp_mp = gpu_stump(T_B, m, ignore_trivial=True, k=k)
-    naive.replace_inf(ref_mp)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp, comp_mp)
+    for k in range(1, 4):
+        ref_mp = naive.stump(T_B, m, exclusion_zone=zone, row_wise=True, k=k)
+        comp_mp = gpu_stump(T_B, m, ignore_trivial=True, k=k)
+        naive.replace_inf(ref_mp)
+        naive.replace_inf(comp_mp)
+        npt.assert_almost_equal(ref_mp, comp_mp)
 
-    comp_mp = gpu_stump(pd.Series(T_B), m, ignore_trivial=True, k=k)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp, comp_mp)
+        comp_mp = gpu_stump(pd.Series(T_B), m, ignore_trivial=True, k=k)
+        naive.replace_inf(comp_mp)
+        npt.assert_almost_equal(ref_mp, comp_mp)
diff --git a/tests/test_stump.py b/tests/test_stump.py
index af2a2315e..d8f0983ee 100644
--- a/tests/test_stump.py
+++ b/tests/test_stump.py
@@ -244,15 +244,15 @@ def test_stump_nan_zero_mean_self_join():
 
 @pytest.mark.parametrize("T_A, T_B", test_data)
 def test_stump_self_join_KNN(T_A, T_B):
-    k = 3
     m = 3
     zone = int(np.ceil(m / 4))
-    ref_mp = naive.stump(T_B, m, exclusion_zone=zone, k=k)
-    comp_mp = stump(T_B, m, ignore_trivial=True, k=k)
-    naive.replace_inf(ref_mp)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp, comp_mp)
+    for k in range(1, 4):
+        ref_mp = naive.stump(T_B, m, exclusion_zone=zone, k=k)
+        comp_mp = stump(T_B, m, ignore_trivial=True, k=k)
+        naive.replace_inf(ref_mp)
+        naive.replace_inf(comp_mp)
+        npt.assert_almost_equal(ref_mp, comp_mp)
 
-    comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True, k=k)
-    naive.replace_inf(comp_mp)
-    npt.assert_almost_equal(ref_mp, comp_mp)
+        comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True, k=k)
+        naive.replace_inf(comp_mp)
+        npt.assert_almost_equal(ref_mp, comp_mp)
diff --git a/tests/test_stumped.py b/tests/test_stumped.py
index 02e914436..168a5f570 100644
--- a/tests/test_stumped.py
+++ b/tests/test_stumped.py
@@ -617,11 +617,11 @@ def test_stumped_two_subsequences_nan_inf_A_B_join_swap(
 @pytest.mark.parametrize("T_A, T_B", test_data)
 def test_stumped_self_join_KNN(T_A, T_B, dask_cluster):
     with Client(dask_cluster) as dask_client:
-        k = 3
         m = 3
         zone = int(np.ceil(m / 4))
-        ref_mp = naive.stump(T_B, m, exclusion_zone=zone, k=k)
-        comp_mp = stumped(dask_client, T_B, m, ignore_trivial=True, k=k)
-        naive.replace_inf(ref_mp)
-        naive.replace_inf(comp_mp)
-        npt.assert_almost_equal(ref_mp, comp_mp)
+        for k in range(1, 4):
+            ref_mp = naive.stump(T_B, m, exclusion_zone=zone, k=k)
+            comp_mp = stumped(dask_client, T_B, m, ignore_trivial=True, k=k)
+            naive.replace_inf(ref_mp)
+            naive.replace_inf(comp_mp)
+            npt.assert_almost_equal(ref_mp, comp_mp)

From 2e3483461b31441e33bbfca74c17aca04e100cf3 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 6 Jun 2022 16:12:05 -0600
Subject: [PATCH 143/151] add test for A_B_join_KNN

---
 tests/test_gpu_stump.py | 12 ++++++++++++
 tests/test_stump.py     | 15 +++++++++++++++
 tests/test_stumped.py   | 16 ++++++++++++++++
 3 files changed, 43 insertions(+)

diff --git a/tests/test_gpu_stump.py b/tests/test_gpu_stump.py
index 02a415541..688592a22 100644
--- a/tests/test_gpu_stump.py
+++ b/tests/test_gpu_stump.py
@@ -405,3 +405,15 @@ def test_gpu_stump_self_join_KNN(T_A, T_B):
         comp_mp = gpu_stump(pd.Series(T_B), m, ignore_trivial=True, k=k)
         naive.replace_inf(comp_mp)
         npt.assert_almost_equal(ref_mp, comp_mp)
+
+
+@pytest.mark.filterwarnings("ignore", category=NumbaPerformanceWarning)
+@pytest.mark.parametrize("T_A, T_B", test_data)
+def test_gpu_stump_A_B_join_KNN(T_A, T_B):
+    m = 3
+    for k in range(1, 4):
+        ref_mp = naive.stump(T_B, m, T_B=T_A, row_wise=True, k=k)
+        comp_mp = gpu_stump(T_B, m, T_A, ignore_trivial=False, k=k)
+        naive.replace_inf(ref_mp)
+        naive.replace_inf(comp_mp)
+        npt.assert_almost_equal(ref_mp, comp_mp)
diff --git a/tests/test_stump.py b/tests/test_stump.py
index d8f0983ee..3e0b34299 100644
--- a/tests/test_stump.py
+++ b/tests/test_stump.py
@@ -256,3 +256,18 @@ def test_stump_self_join_KNN(T_A, T_B):
         comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True, k=k)
         naive.replace_inf(comp_mp)
         npt.assert_almost_equal(ref_mp, comp_mp)
+
+
+@pytest.mark.parametrize("T_A, T_B", test_data)
+def test_stump_A_B_join_KNN(T_A, T_B):
+    m = 3
+    for k in range(1, 4):
+        ref_mp = naive.stump(T_A, m, T_B=T_B, k=k)
+        comp_mp = stump(T_A, m, T_B, ignore_trivial=False, k=k)
+        naive.replace_inf(ref_mp)
+        naive.replace_inf(comp_mp)
+        npt.assert_almost_equal(ref_mp, comp_mp)
+
+        comp_mp = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False, k=k)
+        naive.replace_inf(comp_mp)
+        npt.assert_almost_equal(ref_mp, comp_mp)
diff --git a/tests/test_stumped.py b/tests/test_stumped.py
index 168a5f570..7e8b053d3 100644
--- a/tests/test_stumped.py
+++ b/tests/test_stumped.py
@@ -625,3 +625,19 @@ def test_stumped_self_join_KNN(T_A, T_B, dask_cluster):
             naive.replace_inf(ref_mp)
             naive.replace_inf(comp_mp)
             npt.assert_almost_equal(ref_mp, comp_mp)
+
+
+@pytest.mark.filterwarnings("ignore:numpy.dtype size changed")
+@pytest.mark.filterwarnings("ignore:numpy.ufunc size changed")
+@pytest.mark.filterwarnings("ignore:numpy.ndarray size changed")
+@pytest.mark.filterwarnings("ignore:\\s+Port 8787 is already in use:UserWarning")
+@pytest.mark.parametrize("T_A, T_B", test_data)
+def test_stumped_A_B_join_KNN(T_A, T_B, dask_cluster):
+    with Client(dask_cluster) as dask_client:
+        m = 3
+        for k in range(1, 4):
+            ref_mp = naive.stump(T_A, m, T_B=T_B, k=k)
+            comp_mp = stumped(dask_client, T_A, m, T_B, ignore_trivial=False, k=k)
+            naive.replace_inf(ref_mp)
+            naive.replace_inf(comp_mp)
+            npt.assert_almost_equal(ref_mp, comp_mp)

From 4da736624b6310dfe2c41c90af2b053883abe0b4 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 6 Jun 2022 16:15:50 -0600
Subject: [PATCH 144/151] swap TA and TB in test function so that the value of
 k becomes valid

---
 tests/test_gpu_stump.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_gpu_stump.py b/tests/test_gpu_stump.py
index 688592a22..178effc61 100644
--- a/tests/test_gpu_stump.py
+++ b/tests/test_gpu_stump.py
@@ -412,8 +412,8 @@ def test_gpu_stump_self_join_KNN(T_A, T_B):
 def test_gpu_stump_A_B_join_KNN(T_A, T_B):
     m = 3
     for k in range(1, 4):
-        ref_mp = naive.stump(T_B, m, T_B=T_A, row_wise=True, k=k)
-        comp_mp = gpu_stump(T_B, m, T_A, ignore_trivial=False, k=k)
+        ref_mp = naive.stump(T_A, m, T_B=T_B, row_wise=True, k=k)
+        comp_mp = gpu_stump(T_A, m, T_B, ignore_trivial=False, k=k)
         naive.replace_inf(ref_mp)
         naive.replace_inf(comp_mp)
         npt.assert_almost_equal(ref_mp, comp_mp)

From dcee0f9c240d765dd384a22832660380a0a4f6d0 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 6 Jun 2022 16:24:32 -0600
Subject: [PATCH 145/151] Replicating commits 5565904 and 10878fd


From 7d5e7fcd46e820a405e4c347b59e4930df902b97 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 6 Jun 2022 20:18:09 -0600
Subject: [PATCH 146/151] Add wrapper kernel function for testing a device
 function

Replicating commits bf6edcc, 1b7d971, 7f65b94, d282dfd, 6faa6453
---
 tests/test_gpu_stump.py | 84 ++++++++++++++++++++++++-----------------
 1 file changed, 50 insertions(+), 34 deletions(-)

diff --git a/tests/test_gpu_stump.py b/tests/test_gpu_stump.py
index 178effc61..0a65de68f 100644
--- a/tests/test_gpu_stump.py
+++ b/tests/test_gpu_stump.py
@@ -40,42 +40,58 @@ def test_gpu_stump_int_input():
         gpu_stump(np.arange(10), 5, ignore_trivial=True)
 
 
+@cuda.jit("(f8[:, :], f8[:], i8[:], i8, b1, i8[:])")
+def _gpu_searchsorted_kernel(A, V, bfs, nlevel, is_left, IDX):
+    # A wrapper kernel for calling device function _gpu_searchsorted_left/right.
+    i = cuda.grid(1)
+    if i < A.shape[0]:
+        if is_left:
+            IDX[i] = _gpu_searchsorted_left(A[i], V[i], bfs, nlevel)
+        else:
+            IDX[i] = _gpu_searchsorted_right(A[i], V[i], bfs, nlevel)
+
+
 def test_gpu_searchsorted():
-    for n in range(1, 100):
-        a = np.sort(np.random.rand(n))
-        bfs = core._bfs_indices(n, fill_value=-1)
-        nlevel = np.floor(np.log2(n) + 1).astype(np.int64)
-        for i in range(n):
-            v = a[i] - 0.001
-            npt.assert_almost_equal(
-                _gpu_searchsorted_left(a, v, bfs, nlevel),
-                np.searchsorted(a, v, side="left"),
-            )
-            npt.assert_almost_equal(
-                _gpu_searchsorted_right(a, v, bfs, nlevel),
-                np.searchsorted(a, v, side="right"),
-            )
-
-            v = a[i]
-            npt.assert_almost_equal(
-                _gpu_searchsorted_left(a, v, bfs, nlevel),
-                np.searchsorted(a, v, side="left"),
-            )
-            npt.assert_almost_equal(
-                _gpu_searchsorted_right(a, v, bfs, nlevel),
-                np.searchsorted(a, v, side="right"),
-            )
-
-            v = a[i] + 0.001
-            npt.assert_almost_equal(
-                _gpu_searchsorted_left(a, v, bfs, nlevel),
-                np.searchsorted(a, v, side="left"),
-            )
-            npt.assert_almost_equal(
-                _gpu_searchsorted_right(a, v, bfs, nlevel),
-                np.searchsorted(a, v, side="right"),
-            )
+    n = 5000
+    threads_per_block = config.STUMPY_THREADS_PER_BLOCK
+    blocks_per_grid = math.ceil(n / threads_per_block)
+
+    for k in range(1, 32):
+        device_bfs = cuda.to_device(core._bfs_indices(k, fill_value=-1))
+        nlevel = np.floor(np.log2(k) + 1).astype(np.int64)
+
+        A = np.sort(np.random.rand(n, k), axis=1)
+        device_A = cuda.to_device(A)
+
+        V = np.random.rand(n)
+        for i, idx in enumerate(np.random.choice(np.arange(n), size=k, replace=False)):
+            V[idx] = A[idx, i]  # create ties
+        device_V = cuda.to_device(V)
+
+        is_left = True  # test case
+        ref_IDX = [np.searchsorted(A[i], V[i], side="left") for i in range(n)]
+        ref_IDX = np.asarray(ref_IDX, dtype=np.int64)
+
+        comp_IDX = np.full(n, -1, dtype=np.int64)
+        device_comp_IDX = cuda.to_device(comp_IDX)
+        _gpu_searchsorted_kernel[blocks_per_grid, threads_per_block](
+            device_A, device_V, device_bfs, nlevel, is_left, device_comp_IDX
+        )
+        comp_IDX = device_comp_IDX.copy_to_host()
+        npt.assert_array_equal(ref_IDX, comp_IDX)
 
+        is_left = False  # test case
+        ref_IDX = [np.searchsorted(A[i], V[i], side="right") for i in range(n)]
+        ref_IDX = np.asarray(ref_IDX, dtype=np.int64)
+
+        comp_IDX = np.full(n, -1, dtype=np.int64)
+        device_comp_IDX = cuda.to_device(comp_IDX)
+        _gpu_searchsorted_kernel[blocks_per_grid, threads_per_block](
+            device_A, device_V, device_bfs, nlevel, is_left, device_comp_IDX
+        )
+        comp_IDX = device_comp_IDX.copy_to_host()
+        npt.assert_array_equal(ref_IDX, comp_IDX)
+        
 
 @pytest.mark.filterwarnings("ignore", category=NumbaPerformanceWarning)
 @pytest.mark.parametrize("T_A, T_B", test_data)

From 862a3e810a6dd3caddd482af893bc9a08163fc5c Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 6 Jun 2022 20:23:57 -0600
Subject: [PATCH 147/151] Correct format

---
 tests/test_gpu_stump.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_gpu_stump.py b/tests/test_gpu_stump.py
index 0a65de68f..9ac2ae484 100644
--- a/tests/test_gpu_stump.py
+++ b/tests/test_gpu_stump.py
@@ -91,7 +91,7 @@ def test_gpu_searchsorted():
         )
         comp_IDX = device_comp_IDX.copy_to_host()
         npt.assert_array_equal(ref_IDX, comp_IDX)
-        
+
 
 @pytest.mark.filterwarnings("ignore", category=NumbaPerformanceWarning)
 @pytest.mark.parametrize("T_A, T_B", test_data)

From 90e66becfa219a10047306c07553e38fa5fe7771 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 6 Jun 2022 20:26:30 -0600
Subject: [PATCH 148/151] import missing module

---
 tests/test_gpu_stump.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_gpu_stump.py b/tests/test_gpu_stump.py
index 9ac2ae484..071337cd5 100644
--- a/tests/test_gpu_stump.py
+++ b/tests/test_gpu_stump.py
@@ -1,3 +1,4 @@
+import math
 import numpy as np
 import numpy.testing as npt
 import pandas as pd

From 461e1eb901b4db0f4f181616e7c428184bf529e6 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 6 Jun 2022 22:19:01 -0600
Subject: [PATCH 149/151] testing function for more than one value for
 parameter k

Replicated commit 9789cd9
---
 tests/test_core.py | 38 ++++++++++++++++++--------------------
 1 file changed, 18 insertions(+), 20 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 63a33d1d0..1087f999d 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1063,29 +1063,27 @@ def test_select_P_ABBA_val_inf():
 
 def test_merge_topk_PI():
     n = 50
-    k = 5
+    for k in range(1, 6):
+        PA = np.random.rand(n * k).reshape(n, k)
+        PA = np.sort(PA, axis=1)  # sorting each row separately
 
-    PA = np.random.rand(n * k).reshape(n, k)
-    PA = np.sort(PA, axis=1)  # sorting each row separately
+        PB = np.random.rand(n * k).reshape(n, k)
+        col_idx = np.random.randint(0, k, size=n)
+        for i in range(n):  # creating ties between values of PA and PB
+            PB[i, col_idx[i]] = np.random.choice(PA[i], size=1, replace=False)
+            PB = np.sort(PB, axis=1)
 
-    PB = np.random.rand(n * k).reshape(n, k)
+        IA = np.arange(n * k).reshape(n, k)
+        IB = IA + n * k
 
-    col_idx = np.random.randint(0, k, size=n)
-    for i in range(n):  # creating ties between values of PA and PB
-        PB[i, col_idx[i]] = np.random.choice(PA[i], size=1, replace=False)
-    PB = np.sort(PB, axis=1)
+        ref_P = PA.copy()
+        ref_I = IA.copy()
 
-    IA = np.arange(n * k).reshape(n, k)
-    IB = IA + n * k
+        comp_P = PA.copy()
+        comp_I = IA.copy()
 
-    ref_P = PA.copy()
-    ref_I = IA.copy()
+        naive.merge_topk_PI(ref_P, PB, ref_I, IB)
+        core._merge_topk_PI(comp_P, PB, comp_I, IB)
 
-    comp_P = PA.copy()
-    comp_I = IA.copy()
-
-    naive.merge_topk_PI(ref_P, PB, ref_I, IB)
-    core._merge_topk_PI(comp_P, PB, comp_I, IB)
-
-    npt.assert_array_equal(ref_P, comp_P)
-    npt.assert_array_equal(ref_I, comp_I)
+        npt.assert_array_equal(ref_P, comp_P)
+        npt.assert_array_equal(ref_I, comp_I)

From 1de7532d7faa34752563eaf1cf166524639989f3 Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Mon, 6 Jun 2022 23:55:33 -0600
Subject: [PATCH 150/151] Renamed function to improve readability

replicated commits 4f2ea6c and (partially) 1b7d971
---
 tests/naive.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/naive.py b/tests/naive.py
index 75c69876b..fabe3d922 100644
--- a/tests/naive.py
+++ b/tests/naive.py
@@ -156,7 +156,7 @@ def stamp(T_A, m, T_B=None, exclusion_zone=None):  # pragma: no cover
     return result
 
 
-def searchsorted(a, v):
+def searchsorted_right(a, v):
     """
     Naive version of numpy.searchsorted(..., side='right')
     """
@@ -239,14 +239,14 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, row_wise=False, k=1):
             for i in iter_range:
                 D = distance_matrix[i, i + g]  # D: a single element
                 if D < P[i, k - 1]:
-                    idx = searchsorted(P[i], D)
+                    idx = searchsorted_right(P[i], D)
                     # to keep the top-k, we must get rid of the last element.
                     P[i, :k] = np.insert(P[i, :k], idx, D)[:-1]
                     I[i, :k] = np.insert(I[i, :k], idx, i + g)[:-1]
 
                 if ignore_trivial:  # Self-joins only
                     if D < P[i + g, k - 1]:
-                        idx = searchsorted(P[i + g], D)
+                        idx = searchsorted_right(P[i + g], D)
                         P[i + g, :k] = np.insert(P[i + g, :k], idx, D)[:-1]
                         I[i + g, :k] = np.insert(I[i + g, :k], idx, i)[:-1]
 

From 08c75f76cbe50e90a049e9cd612e12bf39f0352b Mon Sep 17 00:00:00 2001
From: ninimama <nimasarajpoor@gmail.com>
Date: Tue, 7 Jun 2022 01:17:04 -0600
Subject: [PATCH 151/151] Fixed typos

---
 stumpy/gpu_stump.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py
index e67a0a362..8f4683388 100644
--- a/stumpy/gpu_stump.py
+++ b/stumpy/gpu_stump.py
@@ -434,7 +434,7 @@ def _gpu_stump(
 
     device_bfs = cuda.to_device(core._bfs_indices(k, fill_value=-1))
     nlevel = np.floor(np.log2(k) + 1).astype(np.int64)
-    # number of levels in # binary seearch tree from which `bfs` is constructed.
+    # number of levels in binary seearch tree from which `bfs` is constructed.
 
     with cuda.gpus[device_id]:
         device_T_A = cuda.to_device(T_A)
@@ -855,12 +855,12 @@ def gpu_stump(
         # Update (top-k) matrix profile and matrix profile indices
         core._merge_topk_PI(profile[0], profile[i], indices[0], indices[i])
 
-        # Update (top-1) left matrix profile and matrix profil indices
+        # Update (top-1) left matrix profile and matrix profile indices
         cond = profile_L[0] < profile_L[i]
         profile_L[0] = np.where(cond, profile_L[0], profile_L[i])
         indices_L[0] = np.where(cond, indices_L[0], indices_L[i])
 
-        # Update (top-1) right matrix profile and matrix profil indices
+        # Update (top-1) right matrix profile and matrix profile indices
         cond = profile_R[0] < profile_R[i]
         profile_R[0] = np.where(cond, profile_R[0], profile_R[i])
         indices_R[0] = np.where(cond, indices_R[0], indices_R[i])