From c054dcc2ebc74ed36cdc42977be49316535074fb Mon Sep 17 00:00:00 2001
From: Josh Siegle <joshs@alleninstitute.org>
Date: Thu, 25 Apr 2024 11:59:59 -0700
Subject: [PATCH 1/7] Add clock module

---
 harp/clock.py       | 173 ++++++++++++++++++++++++++++++++++++++++++++
 tests/test_clock.py |  47 ++++++++++++
 2 files changed, 220 insertions(+)
 create mode 100644 harp/clock.py
 create mode 100644 tests/test_clock.py

diff --git a/harp/clock.py b/harp/clock.py
new file mode 100644
index 0000000..65be9b0
--- /dev/null
+++ b/harp/clock.py
@@ -0,0 +1,173 @@
+import numpy as np
+import warnings
+
+
+def decode_harp_clock(sample_numbers, states, sample_rate=30000.0, baud_rate=1000.0):
+    """
+    Decodes Harp clock times (in seconds) from a sequence of sample
+    numbers and states.
+
+    The Harp Behavior board can be configured to output a digital
+    signal that encodes the current Harp time as a 32-bit integer,
+    which is emitted once per second.
+
+    The format of the signal is as follows:
+
+      Default value: high
+      Start bit: low -- indicates the transition to the next second
+      8 bits: byte 0, with least significant bit first
+      2 bits: high / low transition
+      8 bits: byte 1, with least significant bit first
+      2 bits: high / low transition
+      8 bits: byte 2, with least significant bit first
+      2 bits: high / low transition
+      8 bits: byte 3, with least significant bit first
+      Final bit: reset to high
+
+    Although the baud rate of the internal Harp clock is
+    100 kHz, the Behavior board outputs the clock signal
+    at a lower baud rate (typically 1 kHz), so it can be
+    acquired by data acquisition systems with sample rates
+    as low as 5 kHz.
+
+    Parameters
+    ----------
+    sample_numbers : np.array
+        Integer sample numbers for each clock line transition
+    states : np.array
+        States (1 or 0) for each clock line transition
+    sample_rate : float
+        The sample rate at which the clock signal was acquired
+    baud_rate : float
+        The baud rate of the clock signal
+
+    Returns
+    -------
+    start_samples : np.array
+        Sample numbers at which each second begins
+    harp_times : np.array
+        Harp clock times in seconds
+    """
+
+    min_delta = int(sample_rate / 2)  # 0.5 seconds
+
+    barcode_edges = get_barcode_edges(sample_numbers, min_delta)
+
+    start_samples = [sample_numbers[edges[0]] for edges in barcode_edges]
+
+    harp_times = [
+        convert_barcode(
+            sample_numbers[edges[0] : edges[1]],
+            states[edges[0] : edges[1]],
+            sample_rate=sample_rate,
+            baud_rate=baud_rate,
+        )
+        for edges in barcode_edges
+    ]
+
+    harp_times_corrected = correct_outliers(np.array(harp_times))
+
+    return np.array(start_samples), harp_times_corrected
+
+
+def get_barcode_edges(sample_numbers, min_delta):
+    """
+    Returns the start and end indices of each barcode
+
+    Parameters
+    ----------
+    sample_numbers : np.array
+        Sample numbers of clock line (high and low states)
+    min_delta : int
+        The minimum number of samples between each barcode
+
+    Returns
+    -------
+    edges : list of tuples
+        Contains the start and end indices of each barcode
+    """
+
+    (splits,) = np.where(np.diff(sample_numbers) > min_delta)
+
+    return list(zip(splits[:-1] + 1, splits[1:] + 1))
+
+
+def convert_barcode(sample_numbers, states, sample_rate, baud_rate):
+    """
+    Converts Harp clock barcode to a clock time in seconds
+
+    Parameters
+    ----------
+    sample_numbers : np.array
+        Integer sample numbers for each clock line transition
+    states : np.array
+        states (1 or 0) for each clock line transition
+    sample_rate : float
+        The sample rate at which the clock signal was acquired
+    baud_rate : float
+        The baud rate of the clock signal
+
+    Returns
+    -------
+    harp_time : int
+        Harp time in seconds for the current barcode
+
+    """
+
+    samples_per_bit = int(sample_rate / baud_rate)
+    middle_sample = int(samples_per_bit / 2)
+
+    intervals = np.diff(sample_numbers)
+
+    barcode = np.concatenate(
+        [np.ones((count,)) * state for state, count in zip(states[:-1], intervals)]
+    ).astype("int")
+
+    val = np.concatenate(
+        [
+            np.arange(
+                samples_per_bit + middle_sample + samples_per_bit * 10 * i,
+                samples_per_bit * 10 * i - middle_sample + samples_per_bit * 10,
+                samples_per_bit,
+            )
+            for i in range(4)
+        ]
+    )
+    s = np.flip(barcode[val])
+    harp_time = s.dot(2 ** np.arange(s.size)[::-1])
+
+    return harp_time
+
+
+def correct_outliers(harp_times):
+    """
+    Corrects outliers in the Harp clock times
+
+    Parameters
+    ----------
+    harp_times : np.array
+        Harp clock times in seconds
+
+    Returns
+    -------
+    corrected_harp_times : np.array
+        Corrected Harp clock times in seconds
+    """
+
+    diffs = np.abs(np.diff(harp_times))
+
+    # Find the indices of the outliers
+    outliers = np.where(diffs > 1)[0]
+
+    if len(outliers) == 1:
+        warnings.warn("One outlier found in the decoded Harp clock. Correcting...")
+    elif len(outliers) > 1:
+        warnings.warn(
+            f"{len(outliers)} outliers found in the decoded Harp clock. Correcting..."
+        )
+
+    # Correct the outliers
+    for outlier in outliers:
+        harp_times[outlier + 1] = harp_times[outlier] + 1
+
+    return harp_times
diff --git a/tests/test_clock.py b/tests/test_clock.py
new file mode 100644
index 0000000..34937d1
--- /dev/null
+++ b/tests/test_clock.py
@@ -0,0 +1,47 @@
+import numpy as np
+from pytest import mark
+from harp.clock import decode_harp_clock
+
+# fmt: off
+testinput = [
+    {
+        'sample_numbers': np.array([
+            0, 28832, 28892, 28922, 28952, 29012, 29072, 29132, 29192,
+            29252, 29282, 29312, 29402, 29432, 29492, 29522, 29552, 29642,
+            29702, 29732, 30002, 58835, 58865, 58925, 58955, 59015, 59075,
+            59135, 59195, 59255, 59285, 59315, 59405, 59435, 59495, 59525,
+            59555, 59645, 59705, 59735, 60005, 88838, 88928, 89018, 89078,
+            89138, 89198, 89258, 89288, 89318, 89408, 89438, 89498, 89528,
+            89558, 89648, 89708, 89738]),
+        'states': np.array([
+            1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+            1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+            1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]),
+        'expected_start_samples': np.array([28832, 58835]),
+        'expected_harp_times': np.array([3806874, 3806875])
+    },
+    {
+        'sample_numbers': np.array([    
+            0, 28833, 29103, 29133, 29283, 29343, 29373, 29433, 29463,
+            29553, 29613, 29643, 29703, 29733, 30003, 58835, 58865, 58895,
+            59105, 59135, 59285, 59345, 59375, 59435, 59465, 59555, 59615,
+            59645, 59705, 59735, 59885, 59945, 59975, 60036, 60065, 60156,
+            60216, 60246, 60306, 60336, 60606, 88958]),
+        'states': np.array([
+            1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+            1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]),
+        'expected_start_samples': np.array([28833, 58835]),
+        'expected_harp_times': np.array([2600960, 2600961])
+    }
+]
+# fmt: on
+
+
+@mark.parametrize("test_input", testinput)
+def test_create_reader(test_input):
+    start_samples, harp_times = decode_harp_clock(
+        test_input["sample_numbers"], test_input["states"]
+    )
+
+    assert np.allclose(start_samples, test_input["expected_start_samples"])
+    assert np.allclose(harp_times, test_input["expected_harp_times"])

From e6058d32e33b2de9e78d61e8bbdc54585263075e Mon Sep 17 00:00:00 2001
From: Josh Siegle <joshs@alleninstitute.org>
Date: Thu, 25 Apr 2024 16:38:54 -0700
Subject: [PATCH 2/7] Add support for timestamps in seconds

---
 harp/clock.py       | 49 +++++++++++++++++++--------------------------
 tests/test_clock.py | 25 ++++++++++++++++++++++-
 2 files changed, 45 insertions(+), 29 deletions(-)

diff --git a/harp/clock.py b/harp/clock.py
index 65be9b0..e2ddb25 100644
--- a/harp/clock.py
+++ b/harp/clock.py
@@ -2,7 +2,9 @@
 import warnings
 
 
-def decode_harp_clock(sample_numbers, states, sample_rate=30000.0, baud_rate=1000.0):
+def decode_harp_clock(
+    timestamps_or_sample_numbers, states, sample_rate=1, baud_rate=1000.0
+):
     """
     Decodes Harp clock times (in seconds) from a sequence of sample
     numbers and states.
@@ -32,34 +34,35 @@ def decode_harp_clock(sample_numbers, states, sample_rate=30000.0, baud_rate=100
 
     Parameters
     ----------
-    sample_numbers : np.array
-        Integer sample numbers for each clock line transition
+    timestamps_or_sample_numbers : np.array
+        Float times in seconds or integer sample numbers
+        for each clock line transition
     states : np.array
         States (1 or 0) for each clock line transition
     sample_rate : float
         The sample rate at which the clock signal was acquired
+        When inputting timestamps in seconds, use a sample rate of 1
     baud_rate : float
         The baud rate of the clock signal
 
     Returns
     -------
-    start_samples : np.array
-        Sample numbers at which each second begins
+    starts : np.array
+        Sample numbers or timestamps at which each second begins
     harp_times : np.array
         Harp clock times in seconds
     """
 
-    min_delta = int(sample_rate / 2)  # 0.5 seconds
+    min_delta = sample_rate / 2  # 0.5 seconds
 
-    barcode_edges = get_barcode_edges(sample_numbers, min_delta)
+    barcode_edges = get_barcode_edges(timestamps_or_sample_numbers, min_delta)
 
-    start_samples = [sample_numbers[edges[0]] for edges in barcode_edges]
+    start_samples = [timestamps_or_sample_numbers[edges[0]] for edges in barcode_edges]
 
     harp_times = [
         convert_barcode(
-            sample_numbers[edges[0] : edges[1]],
+            timestamps_or_sample_numbers[edges[0] : edges[1]] / sample_rate,
             states[edges[0] : edges[1]],
-            sample_rate=sample_rate,
             baud_rate=baud_rate,
         )
         for edges in barcode_edges
@@ -79,7 +82,8 @@ def get_barcode_edges(sample_numbers, min_delta):
     sample_numbers : np.array
         Sample numbers of clock line (high and low states)
     min_delta : int
-        The minimum number of samples between each barcode
+        The minimum length between the end of one
+        barcode and the start of the next
 
     Returns
     -------
@@ -92,18 +96,16 @@ def get_barcode_edges(sample_numbers, min_delta):
     return list(zip(splits[:-1] + 1, splits[1:] + 1))
 
 
-def convert_barcode(sample_numbers, states, sample_rate, baud_rate):
+def convert_barcode(transition_times, states, baud_rate):
     """
     Converts Harp clock barcode to a clock time in seconds
 
     Parameters
     ----------
-    sample_numbers : np.array
-        Integer sample numbers for each clock line transition
+    transition_times : np.array
+        Times (in seconds) each clock line transition
     states : np.array
         states (1 or 0) for each clock line transition
-    sample_rate : float
-        The sample rate at which the clock signal was acquired
     baud_rate : float
         The baud rate of the clock signal
 
@@ -114,25 +116,16 @@ def convert_barcode(sample_numbers, states, sample_rate, baud_rate):
 
     """
 
-    samples_per_bit = int(sample_rate / baud_rate)
-    middle_sample = int(samples_per_bit / 2)
-
-    intervals = np.diff(sample_numbers)
+    intervals = np.round(np.diff(transition_times * baud_rate)).astype("int")
 
     barcode = np.concatenate(
         [np.ones((count,)) * state for state, count in zip(states[:-1], intervals)]
     ).astype("int")
 
     val = np.concatenate(
-        [
-            np.arange(
-                samples_per_bit + middle_sample + samples_per_bit * 10 * i,
-                samples_per_bit * 10 * i - middle_sample + samples_per_bit * 10,
-                samples_per_bit,
-            )
-            for i in range(4)
-        ]
+        (np.arange(1, 9), np.arange(11, 19), np.arange(21, 29), np.arange(31, 39))
     )
+
     s = np.flip(barcode[val])
     harp_time = s.dot(2 ** np.arange(s.size)[::-1])
 
diff --git a/tests/test_clock.py b/tests/test_clock.py
index 34937d1..17b9a72 100644
--- a/tests/test_clock.py
+++ b/tests/test_clock.py
@@ -17,6 +17,7 @@
             1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
             1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
             1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]),
+        'sample_rate': 30000.,
         'expected_start_samples': np.array([28832, 58835]),
         'expected_harp_times': np.array([3806874, 3806875])
     },
@@ -30,8 +31,27 @@
         'states': np.array([
             1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
             1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]),
+        'sample_rate': 30000.,
         'expected_start_samples': np.array([28833, 58835]),
         'expected_harp_times': np.array([2600960, 2600961])
+    },
+    {
+        'sample_numbers': np.array([
+            0.        , 0.9611    , 0.9701    , 0.9711    , 0.9761    ,
+            0.9781    , 0.9791    , 0.9811    , 0.9821    , 0.9851    ,
+            0.9871    , 0.9881    , 0.9901    , 0.9911    , 1.0001    ,
+            1.96116667, 1.96216667, 1.96316667, 1.97016667, 1.97116667,
+            1.97616667, 1.97816667, 1.97916667, 1.98116667, 1.98216667,
+            1.98516667, 1.98716667, 1.98816667, 1.99016667, 1.99116667,
+            1.99616667, 1.99816667, 1.99916667, 2.0012    , 2.00216667,
+            2.0052    , 2.0072    , 2.0082    , 2.0102    , 2.0112    ,
+            2.0202    , 2.96526667]),
+        'states': np.array([
+            1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+            1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]),
+        'sample_rate' : 1,
+        'expected_start_samples': np.array([0.9611, 1.96116667]),
+        'expected_harp_times': np.array([2600960, 2600961])
     }
 ]
 # fmt: on
@@ -40,7 +60,10 @@
 @mark.parametrize("test_input", testinput)
 def test_create_reader(test_input):
     start_samples, harp_times = decode_harp_clock(
-        test_input["sample_numbers"], test_input["states"]
+        test_input["sample_numbers"],
+        test_input["states"],
+        sample_rate=test_input["sample_rate"],
+        baud_rate=1000,
     )
 
     assert np.allclose(start_samples, test_input["expected_start_samples"])

From 11a1532d84ba59e870d3de22d9468adfc1878900 Mon Sep 17 00:00:00 2001
From: Josh Siegle <joshs@alleninstitute.org>
Date: Fri, 26 Apr 2024 08:57:10 -0700
Subject: [PATCH 3/7] Add method to align timestamps to the Harp clock

---
 harp/clock.py       | 54 +++++++++++++++++++++++++++++++++++++++++++++
 tests/test_clock.py | 31 ++++++++++++++++++++------
 2 files changed, 78 insertions(+), 7 deletions(-)

diff --git a/harp/clock.py b/harp/clock.py
index e2ddb25..56321b1 100644
--- a/harp/clock.py
+++ b/harp/clock.py
@@ -2,6 +2,60 @@
 import warnings
 
 
+def align_timestamps_to_harp_clock(timestamps_to_align, start_times, harp_times):
+    """
+    Aligns a set of timestamps or sample numbers to the Harp clock
+
+    `decode_harp_clock` must be run first, in order to find the
+    start of each second in the Harp clock.
+
+    Parameters
+    ----------
+    timestamps_to_align : np.array
+        Timestamps or sample numbers to align to the Harp clock
+    start_times : np.array
+        Start times of each second in the Harp clock
+    harp_times : np.array
+        Harp clock times in seconds
+
+    Returns
+    -------
+    aligned_times : np.array
+        Aligned timestamps or sample numbers
+    """
+
+    if len(start_times) != len(harp_times):
+        raise ValueError(
+            "The number of start times must equal the number of Harp times"
+        )
+
+    N = len(start_times)
+
+    slopes = np.zeros((N + 1,))
+    offsets = np.zeros((N + 1,))
+
+    # compute overall slope and offset
+    A = np.vstack([start_times, np.ones(len(start_times))]).T
+    slopes[0], offsets[0] = np.linalg.lstsq(A, harp_times, rcond=None)[0]
+
+    # compute slope and offset for each segment
+    for i in range(N):
+        x = start_times[i : i + 2]
+        y = harp_times[i : i + 2]
+        A = np.vstack([x, np.ones(len(x))]).T
+        slopes[i + 1], offsets[i + 1] = np.linalg.lstsq(A, y, rcond=None)[0]
+
+    # find the nearest anchor point for each timestamp to align
+    nearest = np.searchsorted(start_times, timestamps_to_align, side="left")
+
+    nearest[nearest == N] = 0
+
+    # interpolate between the anchor points
+    aligned_times = timestamps_to_align * slopes[nearest] + offsets[nearest]
+
+    return aligned_times
+
+
 def decode_harp_clock(
     timestamps_or_sample_numbers, states, sample_rate=1, baud_rate=1000.0
 ):
diff --git a/tests/test_clock.py b/tests/test_clock.py
index 17b9a72..dbecdb1 100644
--- a/tests/test_clock.py
+++ b/tests/test_clock.py
@@ -1,10 +1,12 @@
 import numpy as np
 from pytest import mark
-from harp.clock import decode_harp_clock
+from harp.clock import decode_harp_clock, align_timestamps_to_harp_clock
+import warnings
 
 # fmt: off
 testinput = [
     {
+        # contains two valid Harp clock barcodes
         'sample_numbers': np.array([
             0, 28832, 28892, 28922, 28952, 29012, 29072, 29132, 29192,
             29252, 29282, 29312, 29402, 29432, 29492, 29522, 29552, 29642,
@@ -22,6 +24,7 @@
         'expected_harp_times': np.array([3806874, 3806875])
     },
     {
+        # contains one valid and one invalid Harp clock barcode
         'sample_numbers': np.array([    
             0, 28833, 29103, 29133, 29283, 29343, 29373, 29433, 29463,
             29553, 29613, 29643, 29703, 29733, 30003, 58835, 58865, 58895,
@@ -36,6 +39,7 @@
         'expected_harp_times': np.array([2600960, 2600961])
     },
     {
+        # same as above, but with values in seconds
         'sample_numbers': np.array([
             0.        , 0.9611    , 0.9701    , 0.9711    , 0.9761    ,
             0.9781    , 0.9791    , 0.9811    , 0.9821    , 0.9851    ,
@@ -59,12 +63,25 @@
 
 @mark.parametrize("test_input", testinput)
 def test_create_reader(test_input):
-    start_samples, harp_times = decode_harp_clock(
-        test_input["sample_numbers"],
-        test_input["states"],
-        sample_rate=test_input["sample_rate"],
-        baud_rate=1000,
-    )
+
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        start_samples, harp_times = decode_harp_clock(
+            test_input["sample_numbers"],
+            test_input["states"],
+            sample_rate=test_input["sample_rate"],
+            baud_rate=1000,
+        )
 
     assert np.allclose(start_samples, test_input["expected_start_samples"])
     assert np.allclose(harp_times, test_input["expected_harp_times"])
+
+    # test alignment for samples 1/2 second after anchors
+    ts = start_samples + test_input["sample_rate"] * 0.5
+    aligned_times = align_timestamps_to_harp_clock(ts, start_samples, harp_times)
+    assert np.allclose(aligned_times, harp_times + 0.5)
+
+    # test alignment for samples 1/2 second before anchors
+    ts = start_samples - test_input["sample_rate"] * 0.5
+    aligned_times = align_timestamps_to_harp_clock(ts, start_samples, harp_times)
+    assert np.allclose(aligned_times, harp_times - 0.5)

From 907dccb1e318d066c95c52dec430ac198229e36d Mon Sep 17 00:00:00 2001
From: Josh Siegle <joshs@alleninstitute.org>
Date: Mon, 29 Apr 2024 10:21:04 -0700
Subject: [PATCH 4/7] Only allow timestamps in seconds

---
 harp/clock.py       | 104 +++++++++++++++++++++++++++-----------------
 tests/test_clock.py |  94 +++++++++++++++++++--------------------
 2 files changed, 110 insertions(+), 88 deletions(-)

diff --git a/harp/clock.py b/harp/clock.py
index 56321b1..54fef48 100644
--- a/harp/clock.py
+++ b/harp/clock.py
@@ -4,7 +4,12 @@
 
 def align_timestamps_to_harp_clock(timestamps_to_align, start_times, harp_times):
     """
-    Aligns a set of timestamps or sample numbers to the Harp clock
+    Aligns a set of timestamps to the Harp clock.
+
+    We assume these timestamps are acquired by a system that is
+    not aligned to the Harp clock. This function finds the nearest
+    anchor point in the Harp clock for each timestamp, and then
+    interpolates between the anchor points to align the timestamps.
 
     `decode_harp_clock` must be run first, in order to find the
     start of each second in the Harp clock.
@@ -12,11 +17,13 @@ def align_timestamps_to_harp_clock(timestamps_to_align, start_times, harp_times)
     Parameters
     ----------
     timestamps_to_align : np.array
-        Timestamps or sample numbers to align to the Harp clock
+        Local timestamps (in seconds) to align to the Harp clock
     start_times : np.array
-        Start times of each second in the Harp clock
+        Local start times of each second in the Harp clock
+        (output by `decode_harp_clock`)
     harp_times : np.array
         Harp clock times in seconds
+        (output by `decode_harp_clock`)
 
     Returns
     -------
@@ -57,11 +64,11 @@ def align_timestamps_to_harp_clock(timestamps_to_align, start_times, harp_times)
 
 
 def decode_harp_clock(
-    timestamps_or_sample_numbers, states, sample_rate=1, baud_rate=1000.0
+    timestamps, states, baud_rate=1000.0
 ):
     """
-    Decodes Harp clock times (in seconds) from a sequence of sample
-    numbers and states.
+    Decodes Harp clock times (in seconds) from a sequence of local
+    event timestamps and states.
 
     The Harp Behavior board can be configured to output a digital
     signal that encodes the current Harp time as a 32-bit integer,
@@ -88,53 +95,54 @@ def decode_harp_clock(
 
     Parameters
     ----------
-    timestamps_or_sample_numbers : np.array
-        Float times in seconds or integer sample numbers
-        for each clock line transition
+    timestamps : np.array
+        Float times in seconds for each clock line transition
+        If the acquisition system outputs integer sample numbers
+        for each event, divide by the sample rate to convert to seconds
     states : np.array
         States (1 or 0) for each clock line transition
-    sample_rate : float
-        The sample rate at which the clock signal was acquired
-        When inputting timestamps in seconds, use a sample rate of 1
     baud_rate : float
-        The baud rate of the clock signal
+        The baud rate of the Harp clock signal
 
     Returns
     -------
-    starts : np.array
-        Sample numbers or timestamps at which each second begins
+    start_times : np.array
+        Timestamps at which each second begins
     harp_times : np.array
         Harp clock times in seconds
     """
 
-    min_delta = sample_rate / 2  # 0.5 seconds
+    min_delta = 0.5  # seconds -- Harp clock events must always be
+                     # at least this far apart   
 
-    barcode_edges = get_barcode_edges(timestamps_or_sample_numbers, min_delta)
+    barcode_edges = get_barcode_edges(timestamps, min_delta)
 
-    start_samples = [timestamps_or_sample_numbers[edges[0]] for edges in barcode_edges]
+    start_times = np.array([timestamps[edges[0]] for edges in barcode_edges])
 
-    harp_times = [
+    harp_times = np.array([
         convert_barcode(
-            timestamps_or_sample_numbers[edges[0] : edges[1]] / sample_rate,
+            timestamps[edges[0] : edges[1]],
             states[edges[0] : edges[1]],
             baud_rate=baud_rate,
         )
         for edges in barcode_edges
-    ]
+    ])
 
-    harp_times_corrected = correct_outliers(np.array(harp_times))
+    start_times_corrected, harp_times_corrected = \
+        remove_outliers(start_times, harp_times)
 
-    return np.array(start_samples), harp_times_corrected
+    return start_times_corrected, harp_times_corrected
 
 
-def get_barcode_edges(sample_numbers, min_delta):
+def get_barcode_edges(timestamps, min_delta):
     """
     Returns the start and end indices of each barcode
 
     Parameters
     ----------
-    sample_numbers : np.array
-        Sample numbers of clock line (high and low states)
+    timestamps : np.array
+        Timestamps (ins seconds) of clock line events
+        (high and low states)
     min_delta : int
         The minimum length between the end of one
         barcode and the start of the next
@@ -145,7 +153,7 @@ def get_barcode_edges(sample_numbers, min_delta):
         Contains the start and end indices of each barcode
     """
 
-    (splits,) = np.where(np.diff(sample_numbers) > min_delta)
+    (splits,) = np.where(np.diff(timestamps) > min_delta)
 
     return list(zip(splits[:-1] + 1, splits[1:] + 1))
 
@@ -186,35 +194,49 @@ def convert_barcode(transition_times, states, baud_rate):
     return harp_time
 
 
-def correct_outliers(harp_times):
+def remove_outliers(start_samples, harp_times):
     """
-    Corrects outliers in the Harp clock times
+    Removes outliers from the Harp clock times
+
+    These outliers are caused by problems decoding
+    the Harp clock signal, leading to consecutive times that
+    do not increase by exactly 1. These will be removed from
+    the array of Harp times, so they will not be used 
+    as anchor points during subsequent clock alignment.
+
+    If the times jump to a new value and continue to
+    increase by 1, either due to a reset of the Harp clock
+    or a gap in the data, these will be ignored.
 
     Parameters
     ----------
+    start_samples : np.array
+        Harp clock start times in seconds
     harp_times : np.array
         Harp clock times in seconds
 
     Returns
     -------
+    corrected_start_samples : np.array
+        Corrected Harp clock times in seconds
     corrected_harp_times : np.array
         Corrected Harp clock times in seconds
     """
 
-    diffs = np.abs(np.diff(harp_times))
+    original_indices = np.arange(len(harp_times))
 
-    # Find the indices of the outliers
-    outliers = np.where(diffs > 1)[0]
+    new_indices = np.concatenate(
+        [sub_array for sub_array
+         in np.split(original_indices,
+          np.where(np.diff(harp_times) != 1)[0]+1)
+         if len(sub_array) > 1])
+    
+    num_outliers = len(original_indices) - len(new_indices)
 
-    if len(outliers) == 1:
-        warnings.warn("One outlier found in the decoded Harp clock. Correcting...")
-    elif len(outliers) > 1:
+    if num_outliers > 0:
         warnings.warn(
-            f"{len(outliers)} outliers found in the decoded Harp clock. Correcting..."
+            f"{num_outliers} outlier{'s' if num_outliers > 1 else ''} " +
+            "found in the decoded Harp clock. Removing..."
         )
 
-    # Correct the outliers
-    for outlier in outliers:
-        harp_times[outlier + 1] = harp_times[outlier] + 1
-
-    return harp_times
+    return start_samples[new_indices], harp_times[new_indices]
diff --git a/tests/test_clock.py b/tests/test_clock.py
index dbecdb1..c231b16 100644
--- a/tests/test_clock.py
+++ b/tests/test_clock.py
@@ -7,55 +7,56 @@
 testinput = [
     {
         # contains two valid Harp clock barcodes
-        'sample_numbers': np.array([
-            0, 28832, 28892, 28922, 28952, 29012, 29072, 29132, 29192,
-            29252, 29282, 29312, 29402, 29432, 29492, 29522, 29552, 29642,
-            29702, 29732, 30002, 58835, 58865, 58925, 58955, 59015, 59075,
-            59135, 59195, 59255, 59285, 59315, 59405, 59435, 59495, 59525,
-            59555, 59645, 59705, 59735, 60005, 88838, 88928, 89018, 89078,
-            89138, 89198, 89258, 89288, 89318, 89408, 89438, 89498, 89528,
-            89558, 89648, 89708, 89738]),
+        'timestamps': np.array([
+            0.        , 0.96106667, 0.96306667, 0.96406667, 0.96506667,
+            0.96706667, 0.96906667, 0.97106667, 0.97306667, 0.97506667,
+            0.97606667, 0.97706667, 0.98006667, 0.98106667, 0.98306667,
+            0.98406667, 0.98506667, 0.98806667, 0.99006667, 0.99106667,
+            1.00006667, 1.96116667, 1.96216667, 1.96416667, 1.96516667,
+            1.96716667, 1.96916667, 1.97116667, 1.97316667, 1.97516667,
+            1.97616667, 1.97716667, 1.98016667, 1.98116667, 1.98316667,
+            1.98416667, 1.98516667, 1.98816667, 1.99016667, 1.99116667,
+            2.00016667, 2.96126667, 2.96426667, 2.96726667, 2.96926667,
+            2.97126667, 2.97326667, 2.97526667, 2.97626667, 2.97726667,
+            2.98026667, 2.98126667, 2.98326667, 2.98426667, 2.98526667,
+            2.98826667, 2.99026667, 2.99126667]),
         'states': np.array([
             1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
             1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
             1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]),
-        'sample_rate': 30000.,
-        'expected_start_samples': np.array([28832, 58835]),
+        'expected_start_times': np.array([0.96106667, 1.96116667]),
         'expected_harp_times': np.array([3806874, 3806875])
     },
     {
-        # contains one valid and one invalid Harp clock barcode
-        'sample_numbers': np.array([    
-            0, 28833, 29103, 29133, 29283, 29343, 29373, 29433, 29463,
-            29553, 29613, 29643, 29703, 29733, 30003, 58835, 58865, 58895,
-            59105, 59135, 59285, 59345, 59375, 59435, 59465, 59555, 59615,
-            59645, 59705, 59735, 59885, 59945, 59975, 60036, 60065, 60156,
-            60216, 60246, 60306, 60336, 60606, 88958]),
+        # contains 4 valid Harp clock barcodes and one invalid barcode
+        'timestamps': np.array([
+            0.14036667, 1.10146667, 1.10246667, 1.10346667, 1.10446667,
+            1.11146667, 1.11246667, 1.11646667, 1.11746667, 1.11846667,
+            1.11946667, 1.12146667, 1.12246667, 1.12546667, 1.12746667,
+            1.12846667, 1.13046667, 1.13146667, 1.14046667, 2.10156667,
+            2.10356667, 2.11156667, 2.11256667, 2.11656667, 2.11756667,
+            2.11856667, 2.11956667, 2.12156667, 2.12256667, 2.12556667,
+            2.12756667, 2.12856667, 2.13056667, 2.13156667, 2.14056667,
+            3.10163333, 3.10263333, 3.11163333, 3.11263333, 3.11663333,
+            3.11763333, 3.11863333, 3.11963333, 3.12163333, 3.12263333,
+            3.12563333, 3.12763333, 3.12863333, 3.13063333, 3.13163333,
+            3.14063333, 4.10173333, 4.11073333, 4.11173333, 4.11673333,
+            4.11873333, 4.11973333, 4.12173333, 4.12273333, 4.12573333,
+            4.12773333, 4.12873333, 4.13073333, 4.13173333, 4.14073333,
+            5.1018    , 5.1028    , 5.1038    , 5.1108    , 5.1118    ,
+            5.1168    , 5.1188    , 5.1198    , 5.1218    , 5.1228    ,
+            5.1258    , 5.1278    , 5.1288    , 5.1308    , 5.1318    ,
+            5.1368    , 5.1388    , 5.1398    , 5.14183333, 5.1428    ,
+            5.14583333, 5.14783333, 5.14883333, 5.15083333, 5.15183333,
+            5.16083333, 6.1059    ]),
         'states': np.array([
             1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
-            1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]),
-        'sample_rate': 30000.,
-        'expected_start_samples': np.array([28833, 58835]),
-        'expected_harp_times': np.array([2600960, 2600961])
-    },
-    {
-        # same as above, but with values in seconds
-        'sample_numbers': np.array([
-            0.        , 0.9611    , 0.9701    , 0.9711    , 0.9761    ,
-            0.9781    , 0.9791    , 0.9811    , 0.9821    , 0.9851    ,
-            0.9871    , 0.9881    , 0.9901    , 0.9911    , 1.0001    ,
-            1.96116667, 1.96216667, 1.96316667, 1.97016667, 1.97116667,
-            1.97616667, 1.97816667, 1.97916667, 1.98116667, 1.98216667,
-            1.98516667, 1.98716667, 1.98816667, 1.99016667, 1.99116667,
-            1.99616667, 1.99816667, 1.99916667, 2.0012    , 2.00216667,
-            2.0052    , 2.0072    , 2.0082    , 2.0102    , 2.0112    ,
-            2.0202    , 2.96526667]),
-        'states': np.array([
             1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
-            1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]),
-        'sample_rate' : 1,
-        'expected_start_samples': np.array([0.9611, 1.96116667]),
-        'expected_harp_times': np.array([2600960, 2600961])
+            1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+            1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+            1, 0, 1, 0]),
+        'expected_start_times': np.array([1.10146667, 2.10156667, 3.10163333, 4.10173333]),
+        'expected_harp_times': np.array([2600957, 2600958, 2600959, 2600960])
     }
 ]
 # fmt: on
@@ -66,22 +67,21 @@ def test_create_reader(test_input):
 
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
-        start_samples, harp_times = decode_harp_clock(
-            test_input["sample_numbers"],
+        start_times, harp_times = decode_harp_clock(
+            test_input["timestamps"],
             test_input["states"],
-            sample_rate=test_input["sample_rate"],
             baud_rate=1000,
         )
 
-    assert np.allclose(start_samples, test_input["expected_start_samples"])
+    assert np.allclose(start_times, test_input["expected_start_times"])
     assert np.allclose(harp_times, test_input["expected_harp_times"])
 
     # test alignment for samples 1/2 second after anchors
-    ts = start_samples + test_input["sample_rate"] * 0.5
-    aligned_times = align_timestamps_to_harp_clock(ts, start_samples, harp_times)
+    ts = start_times + 0.5
+    aligned_times = align_timestamps_to_harp_clock(ts, start_times, harp_times)
     assert np.allclose(aligned_times, harp_times + 0.5)
 
     # test alignment for samples 1/2 second before anchors
-    ts = start_samples - test_input["sample_rate"] * 0.5
-    aligned_times = align_timestamps_to_harp_clock(ts, start_samples, harp_times)
+    ts = start_times - 0.5
+    aligned_times = align_timestamps_to_harp_clock(ts, start_times, harp_times)
     assert np.allclose(aligned_times, harp_times - 0.5)

From 3644013e9108e1c4db8d9ef44347a669477275e7 Mon Sep 17 00:00:00 2001
From: Josh Siegle <joshs@alleninstitute.org>
Date: Mon, 29 Apr 2024 10:22:34 -0700
Subject: [PATCH 5/7] Sample numbers --> timestamps

---
 harp/clock.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/harp/clock.py b/harp/clock.py
index 54fef48..1b0ede5 100644
--- a/harp/clock.py
+++ b/harp/clock.py
@@ -28,7 +28,7 @@ def align_timestamps_to_harp_clock(timestamps_to_align, start_times, harp_times)
     Returns
     -------
     aligned_times : np.array
-        Aligned timestamps or sample numbers
+        Aligned timestamps
     """
 
     if len(start_times) != len(harp_times):
@@ -194,7 +194,7 @@ def convert_barcode(transition_times, states, baud_rate):
     return harp_time
 
 
-def remove_outliers(start_samples, harp_times):
+def remove_outliers(start_times, harp_times):
     """
     Removes outliers from the Harp clock times
 
@@ -210,14 +210,14 @@ def remove_outliers(start_samples, harp_times):
 
     Parameters
     ----------
-    start_samples : np.array
+    start_times : np.array
         Harp clock start times in seconds
     harp_times : np.array
         Harp clock times in seconds
 
     Returns
     -------
-    corrected_start_samples : np.array
+    corrected_start_times : np.array
         Corrected Harp clock times in seconds
     corrected_harp_times : np.array
         Corrected Harp clock times in seconds
@@ -239,4 +239,4 @@ def remove_outliers(start_samples, harp_times):
             "found in the decoded Harp clock. Removing..."
         )
 
-    return start_samples[new_indices], harp_times[new_indices]
+    return start_times[new_indices], harp_times[new_indices]

From 0b0be8a86cbfcbdd1d8b881e8461977359fbb673 Mon Sep 17 00:00:00 2001
From: Josh Siegle <joshs@alleninstitute.org>
Date: Mon, 29 Apr 2024 10:22:57 -0700
Subject: [PATCH 6/7] Apply formatting

---
 harp/clock.py | 49 +++++++++++++++++++++++++++----------------------
 1 file changed, 27 insertions(+), 22 deletions(-)

diff --git a/harp/clock.py b/harp/clock.py
index 1b0ede5..201984b 100644
--- a/harp/clock.py
+++ b/harp/clock.py
@@ -63,9 +63,7 @@ def align_timestamps_to_harp_clock(timestamps_to_align, start_times, harp_times)
     return aligned_times
 
 
-def decode_harp_clock(
-    timestamps, states, baud_rate=1000.0
-):
+def decode_harp_clock(timestamps, states, baud_rate=1000.0):
     """
     Decodes Harp clock times (in seconds) from a sequence of local
     event timestamps and states.
@@ -113,23 +111,26 @@ def decode_harp_clock(
     """
 
     min_delta = 0.5  # seconds -- Harp clock events must always be
-                     # at least this far apart   
+    # at least this far apart
 
     barcode_edges = get_barcode_edges(timestamps, min_delta)
 
     start_times = np.array([timestamps[edges[0]] for edges in barcode_edges])
 
-    harp_times = np.array([
-        convert_barcode(
-            timestamps[edges[0] : edges[1]],
-            states[edges[0] : edges[1]],
-            baud_rate=baud_rate,
-        )
-        for edges in barcode_edges
-    ])
+    harp_times = np.array(
+        [
+            convert_barcode(
+                timestamps[edges[0] : edges[1]],
+                states[edges[0] : edges[1]],
+                baud_rate=baud_rate,
+            )
+            for edges in barcode_edges
+        ]
+    )
 
-    start_times_corrected, harp_times_corrected = \
-        remove_outliers(start_times, harp_times)
+    start_times_corrected, harp_times_corrected = remove_outliers(
+        start_times, harp_times
+    )
 
     return start_times_corrected, harp_times_corrected
 
@@ -201,7 +202,7 @@ def remove_outliers(start_times, harp_times):
     These outliers are caused by problems decoding
     the Harp clock signal, leading to consecutive times that
     do not increase by exactly 1. These will be removed from
-    the array of Harp times, so they will not be used 
+    the array of Harp times, so they will not be used
     as anchor points during subsequent clock alignment.
 
     If the times jump to a new value and continue to
@@ -226,17 +227,21 @@ def remove_outliers(start_times, harp_times):
     original_indices = np.arange(len(harp_times))
 
     new_indices = np.concatenate(
-        [sub_array for sub_array
-         in np.split(original_indices,
-          np.where(np.diff(harp_times) != 1)[0]+1)
-         if len(sub_array) > 1])
-    
+        [
+            sub_array
+            for sub_array in np.split(
+                original_indices, np.where(np.diff(harp_times) != 1)[0] + 1
+            )
+            if len(sub_array) > 1
+        ]
+    )
+
     num_outliers = len(original_indices) - len(new_indices)
 
     if num_outliers > 0:
         warnings.warn(
-            f"{num_outliers} outlier{'s' if num_outliers > 1 else ''} " +
-            "found in the decoded Harp clock. Removing..."
+            f"{num_outliers} outlier{'s' if num_outliers > 1 else ''} "
+            + "found in the decoded Harp clock. Removing..."
         )
 
     return start_times[new_indices], harp_times[new_indices]

From 640d08559c1845a2dca92b7cb2f7e38927149978 Mon Sep 17 00:00:00 2001
From: Josh Siegle <joshs@alleninstitute.org>
Date: Wed, 1 May 2024 11:19:01 -0700
Subject: [PATCH 7/7] Make function name more general

---
 harp/clock.py       | 15 ++++++++-------
 tests/test_clock.py |  6 +++---
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/harp/clock.py b/harp/clock.py
index 201984b..f22431f 100644
--- a/harp/clock.py
+++ b/harp/clock.py
@@ -2,9 +2,9 @@
 import warnings
 
 
-def align_timestamps_to_harp_clock(timestamps_to_align, start_times, harp_times):
+def align_timestamps_to_anchor_points(timestamps_to_align, start_times, anchor_times):
     """
-    Aligns a set of timestamps to the Harp clock.
+    Aligns a set of timestamps to anchor points (e.g., Harp clock times)
 
     We assume these timestamps are acquired by a system that is
     not aligned to the Harp clock. This function finds the nearest
@@ -21,8 +21,9 @@ def align_timestamps_to_harp_clock(timestamps_to_align, start_times, harp_times)
     start_times : np.array
         Local start times of each second in the Harp clock
         (output by `decode_harp_clock`)
-    harp_times : np.array
-        Harp clock times in seconds
+    anchor_times : np.array
+        Global clock times in seconds (typically the Harp clock times,
+        but can be any set of anchor points to align to)
         (output by `decode_harp_clock`)
 
     Returns
@@ -31,7 +32,7 @@ def align_timestamps_to_harp_clock(timestamps_to_align, start_times, harp_times)
         Aligned timestamps
     """
 
-    if len(start_times) != len(harp_times):
+    if len(start_times) != len(anchor_times):
         raise ValueError(
             "The number of start times must equal the number of Harp times"
         )
@@ -43,12 +44,12 @@ def align_timestamps_to_harp_clock(timestamps_to_align, start_times, harp_times)
 
     # compute overall slope and offset
     A = np.vstack([start_times, np.ones(len(start_times))]).T
-    slopes[0], offsets[0] = np.linalg.lstsq(A, harp_times, rcond=None)[0]
+    slopes[0], offsets[0] = np.linalg.lstsq(A, anchor_times, rcond=None)[0]
 
     # compute slope and offset for each segment
     for i in range(N):
         x = start_times[i : i + 2]
-        y = harp_times[i : i + 2]
+        y = anchor_times[i : i + 2]
         A = np.vstack([x, np.ones(len(x))]).T
         slopes[i + 1], offsets[i + 1] = np.linalg.lstsq(A, y, rcond=None)[0]
 
diff --git a/tests/test_clock.py b/tests/test_clock.py
index c231b16..5b7dd7f 100644
--- a/tests/test_clock.py
+++ b/tests/test_clock.py
@@ -1,6 +1,6 @@
 import numpy as np
 from pytest import mark
-from harp.clock import decode_harp_clock, align_timestamps_to_harp_clock
+from harp.clock import decode_harp_clock, align_timestamps_to_anchor_points
 import warnings
 
 # fmt: off
@@ -78,10 +78,10 @@ def test_create_reader(test_input):
 
     # test alignment for samples 1/2 second after anchors
     ts = start_times + 0.5
-    aligned_times = align_timestamps_to_harp_clock(ts, start_times, harp_times)
+    aligned_times = align_timestamps_to_anchor_points(ts, start_times, harp_times)
     assert np.allclose(aligned_times, harp_times + 0.5)
 
     # test alignment for samples 1/2 second before anchors
     ts = start_times - 0.5
-    aligned_times = align_timestamps_to_harp_clock(ts, start_times, harp_times)
+    aligned_times = align_timestamps_to_anchor_points(ts, start_times, harp_times)
     assert np.allclose(aligned_times, harp_times - 0.5)