From c054dcc2ebc74ed36cdc42977be49316535074fb Mon Sep 17 00:00:00 2001 From: Josh Siegle Date: Thu, 25 Apr 2024 11:59:59 -0700 Subject: [PATCH 1/7] Add clock module --- harp/clock.py | 173 ++++++++++++++++++++++++++++++++++++++++++++ tests/test_clock.py | 47 ++++++++++++ 2 files changed, 220 insertions(+) create mode 100644 harp/clock.py create mode 100644 tests/test_clock.py diff --git a/harp/clock.py b/harp/clock.py new file mode 100644 index 0000000..65be9b0 --- /dev/null +++ b/harp/clock.py @@ -0,0 +1,173 @@ +import numpy as np +import warnings + + +def decode_harp_clock(sample_numbers, states, sample_rate=30000.0, baud_rate=1000.0): + """ + Decodes Harp clock times (in seconds) from a sequence of sample + numbers and states. + + The Harp Behavior board can be configured to output a digital + signal that encodes the current Harp time as a 32-bit integer, + which is emitted once per second. + + The format of the signal is as follows: + + Default value: high + Start bit: low -- indicates the transition to the next second + 8 bits: byte 0, with least significant bit first + 2 bits: high / low transition + 8 bits: byte 1, with least significant bit first + 2 bits: high / low transition + 8 bits: byte 2, with least significant bit first + 2 bits: high / low transition + 8 bits: byte 3, with least significant bit first + Final bit: reset to high + + Although the baud rate of the internal Harp clock is + 100 kHz, the Behavior board outputs the clock signal + at a lower baud rate (typically 1 kHz), so it can be + acquired by data acquisition systems with sample rates + as low as 5 kHz. + + Parameters + ---------- + sample_numbers : np.array + Integer sample numbers for each clock line transition + states : np.array + States (1 or 0) for each clock line transition + sample_rate : float + The sample rate at which the clock signal was acquired + baud_rate : float + The baud rate of the clock signal + + Returns + ------- + start_samples : np.array + Sample numbers at which each second begins + harp_times : np.array + Harp clock times in seconds + """ + + min_delta = int(sample_rate / 2) # 0.5 seconds + + barcode_edges = get_barcode_edges(sample_numbers, min_delta) + + start_samples = [sample_numbers[edges[0]] for edges in barcode_edges] + + harp_times = [ + convert_barcode( + sample_numbers[edges[0] : edges[1]], + states[edges[0] : edges[1]], + sample_rate=sample_rate, + baud_rate=baud_rate, + ) + for edges in barcode_edges + ] + + harp_times_corrected = correct_outliers(np.array(harp_times)) + + return np.array(start_samples), harp_times_corrected + + +def get_barcode_edges(sample_numbers, min_delta): + """ + Returns the start and end indices of each barcode + + Parameters + ---------- + sample_numbers : np.array + Sample numbers of clock line (high and low states) + min_delta : int + The minimum number of samples between each barcode + + Returns + ------- + edges : list of tuples + Contains the start and end indices of each barcode + """ + + (splits,) = np.where(np.diff(sample_numbers) > min_delta) + + return list(zip(splits[:-1] + 1, splits[1:] + 1)) + + +def convert_barcode(sample_numbers, states, sample_rate, baud_rate): + """ + Converts Harp clock barcode to a clock time in seconds + + Parameters + ---------- + sample_numbers : np.array + Integer sample numbers for each clock line transition + states : np.array + states (1 or 0) for each clock line transition + sample_rate : float + The sample rate at which the clock signal was acquired + baud_rate : float + The baud rate of the clock signal + + Returns + ------- + harp_time : int + Harp time in seconds for the current barcode + + """ + + samples_per_bit = int(sample_rate / baud_rate) + middle_sample = int(samples_per_bit / 2) + + intervals = np.diff(sample_numbers) + + barcode = np.concatenate( + [np.ones((count,)) * state for state, count in zip(states[:-1], intervals)] + ).astype("int") + + val = np.concatenate( + [ + np.arange( + samples_per_bit + middle_sample + samples_per_bit * 10 * i, + samples_per_bit * 10 * i - middle_sample + samples_per_bit * 10, + samples_per_bit, + ) + for i in range(4) + ] + ) + s = np.flip(barcode[val]) + harp_time = s.dot(2 ** np.arange(s.size)[::-1]) + + return harp_time + + +def correct_outliers(harp_times): + """ + Corrects outliers in the Harp clock times + + Parameters + ---------- + harp_times : np.array + Harp clock times in seconds + + Returns + ------- + corrected_harp_times : np.array + Corrected Harp clock times in seconds + """ + + diffs = np.abs(np.diff(harp_times)) + + # Find the indices of the outliers + outliers = np.where(diffs > 1)[0] + + if len(outliers) == 1: + warnings.warn("One outlier found in the decoded Harp clock. Correcting...") + elif len(outliers) > 1: + warnings.warn( + f"{len(outliers)} outliers found in the decoded Harp clock. Correcting..." + ) + + # Correct the outliers + for outlier in outliers: + harp_times[outlier + 1] = harp_times[outlier] + 1 + + return harp_times diff --git a/tests/test_clock.py b/tests/test_clock.py new file mode 100644 index 0000000..34937d1 --- /dev/null +++ b/tests/test_clock.py @@ -0,0 +1,47 @@ +import numpy as np +from pytest import mark +from harp.clock import decode_harp_clock + +# fmt: off +testinput = [ + { + 'sample_numbers': np.array([ + 0, 28832, 28892, 28922, 28952, 29012, 29072, 29132, 29192, + 29252, 29282, 29312, 29402, 29432, 29492, 29522, 29552, 29642, + 29702, 29732, 30002, 58835, 58865, 58925, 58955, 59015, 59075, + 59135, 59195, 59255, 59285, 59315, 59405, 59435, 59495, 59525, + 59555, 59645, 59705, 59735, 60005, 88838, 88928, 89018, 89078, + 89138, 89198, 89258, 89288, 89318, 89408, 89438, 89498, 89528, + 89558, 89648, 89708, 89738]), + 'states': np.array([ + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]), + 'expected_start_samples': np.array([28832, 58835]), + 'expected_harp_times': np.array([3806874, 3806875]) + }, + { + 'sample_numbers': np.array([ + 0, 28833, 29103, 29133, 29283, 29343, 29373, 29433, 29463, + 29553, 29613, 29643, 29703, 29733, 30003, 58835, 58865, 58895, + 59105, 59135, 59285, 59345, 59375, 59435, 59465, 59555, 59615, + 59645, 59705, 59735, 59885, 59945, 59975, 60036, 60065, 60156, + 60216, 60246, 60306, 60336, 60606, 88958]), + 'states': np.array([ + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]), + 'expected_start_samples': np.array([28833, 58835]), + 'expected_harp_times': np.array([2600960, 2600961]) + } +] +# fmt: on + + +@mark.parametrize("test_input", testinput) +def test_create_reader(test_input): + start_samples, harp_times = decode_harp_clock( + test_input["sample_numbers"], test_input["states"] + ) + + assert np.allclose(start_samples, test_input["expected_start_samples"]) + assert np.allclose(harp_times, test_input["expected_harp_times"]) From e6058d32e33b2de9e78d61e8bbdc54585263075e Mon Sep 17 00:00:00 2001 From: Josh Siegle Date: Thu, 25 Apr 2024 16:38:54 -0700 Subject: [PATCH 2/7] Add support for timestamps in seconds --- harp/clock.py | 49 +++++++++++++++++++-------------------------- tests/test_clock.py | 25 ++++++++++++++++++++++- 2 files changed, 45 insertions(+), 29 deletions(-) diff --git a/harp/clock.py b/harp/clock.py index 65be9b0..e2ddb25 100644 --- a/harp/clock.py +++ b/harp/clock.py @@ -2,7 +2,9 @@ import warnings -def decode_harp_clock(sample_numbers, states, sample_rate=30000.0, baud_rate=1000.0): +def decode_harp_clock( + timestamps_or_sample_numbers, states, sample_rate=1, baud_rate=1000.0 +): """ Decodes Harp clock times (in seconds) from a sequence of sample numbers and states. @@ -32,34 +34,35 @@ def decode_harp_clock(sample_numbers, states, sample_rate=30000.0, baud_rate=100 Parameters ---------- - sample_numbers : np.array - Integer sample numbers for each clock line transition + timestamps_or_sample_numbers : np.array + Float times in seconds or integer sample numbers + for each clock line transition states : np.array States (1 or 0) for each clock line transition sample_rate : float The sample rate at which the clock signal was acquired + When inputting timestamps in seconds, use a sample rate of 1 baud_rate : float The baud rate of the clock signal Returns ------- - start_samples : np.array - Sample numbers at which each second begins + starts : np.array + Sample numbers or timestamps at which each second begins harp_times : np.array Harp clock times in seconds """ - min_delta = int(sample_rate / 2) # 0.5 seconds + min_delta = sample_rate / 2 # 0.5 seconds - barcode_edges = get_barcode_edges(sample_numbers, min_delta) + barcode_edges = get_barcode_edges(timestamps_or_sample_numbers, min_delta) - start_samples = [sample_numbers[edges[0]] for edges in barcode_edges] + start_samples = [timestamps_or_sample_numbers[edges[0]] for edges in barcode_edges] harp_times = [ convert_barcode( - sample_numbers[edges[0] : edges[1]], + timestamps_or_sample_numbers[edges[0] : edges[1]] / sample_rate, states[edges[0] : edges[1]], - sample_rate=sample_rate, baud_rate=baud_rate, ) for edges in barcode_edges @@ -79,7 +82,8 @@ def get_barcode_edges(sample_numbers, min_delta): sample_numbers : np.array Sample numbers of clock line (high and low states) min_delta : int - The minimum number of samples between each barcode + The minimum length between the end of one + barcode and the start of the next Returns ------- @@ -92,18 +96,16 @@ def get_barcode_edges(sample_numbers, min_delta): return list(zip(splits[:-1] + 1, splits[1:] + 1)) -def convert_barcode(sample_numbers, states, sample_rate, baud_rate): +def convert_barcode(transition_times, states, baud_rate): """ Converts Harp clock barcode to a clock time in seconds Parameters ---------- - sample_numbers : np.array - Integer sample numbers for each clock line transition + transition_times : np.array + Times (in seconds) each clock line transition states : np.array states (1 or 0) for each clock line transition - sample_rate : float - The sample rate at which the clock signal was acquired baud_rate : float The baud rate of the clock signal @@ -114,25 +116,16 @@ def convert_barcode(sample_numbers, states, sample_rate, baud_rate): """ - samples_per_bit = int(sample_rate / baud_rate) - middle_sample = int(samples_per_bit / 2) - - intervals = np.diff(sample_numbers) + intervals = np.round(np.diff(transition_times * baud_rate)).astype("int") barcode = np.concatenate( [np.ones((count,)) * state for state, count in zip(states[:-1], intervals)] ).astype("int") val = np.concatenate( - [ - np.arange( - samples_per_bit + middle_sample + samples_per_bit * 10 * i, - samples_per_bit * 10 * i - middle_sample + samples_per_bit * 10, - samples_per_bit, - ) - for i in range(4) - ] + (np.arange(1, 9), np.arange(11, 19), np.arange(21, 29), np.arange(31, 39)) ) + s = np.flip(barcode[val]) harp_time = s.dot(2 ** np.arange(s.size)[::-1]) diff --git a/tests/test_clock.py b/tests/test_clock.py index 34937d1..17b9a72 100644 --- a/tests/test_clock.py +++ b/tests/test_clock.py @@ -17,6 +17,7 @@ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]), + 'sample_rate': 30000., 'expected_start_samples': np.array([28832, 58835]), 'expected_harp_times': np.array([3806874, 3806875]) }, @@ -30,8 +31,27 @@ 'states': np.array([ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]), + 'sample_rate': 30000., 'expected_start_samples': np.array([28833, 58835]), 'expected_harp_times': np.array([2600960, 2600961]) + }, + { + 'sample_numbers': np.array([ + 0. , 0.9611 , 0.9701 , 0.9711 , 0.9761 , + 0.9781 , 0.9791 , 0.9811 , 0.9821 , 0.9851 , + 0.9871 , 0.9881 , 0.9901 , 0.9911 , 1.0001 , + 1.96116667, 1.96216667, 1.96316667, 1.97016667, 1.97116667, + 1.97616667, 1.97816667, 1.97916667, 1.98116667, 1.98216667, + 1.98516667, 1.98716667, 1.98816667, 1.99016667, 1.99116667, + 1.99616667, 1.99816667, 1.99916667, 2.0012 , 2.00216667, + 2.0052 , 2.0072 , 2.0082 , 2.0102 , 2.0112 , + 2.0202 , 2.96526667]), + 'states': np.array([ + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]), + 'sample_rate' : 1, + 'expected_start_samples': np.array([0.9611, 1.96116667]), + 'expected_harp_times': np.array([2600960, 2600961]) } ] # fmt: on @@ -40,7 +60,10 @@ @mark.parametrize("test_input", testinput) def test_create_reader(test_input): start_samples, harp_times = decode_harp_clock( - test_input["sample_numbers"], test_input["states"] + test_input["sample_numbers"], + test_input["states"], + sample_rate=test_input["sample_rate"], + baud_rate=1000, ) assert np.allclose(start_samples, test_input["expected_start_samples"]) From 11a1532d84ba59e870d3de22d9468adfc1878900 Mon Sep 17 00:00:00 2001 From: Josh Siegle Date: Fri, 26 Apr 2024 08:57:10 -0700 Subject: [PATCH 3/7] Add method to align timestamps to the Harp clock --- harp/clock.py | 54 +++++++++++++++++++++++++++++++++++++++++++++ tests/test_clock.py | 31 ++++++++++++++++++++------ 2 files changed, 78 insertions(+), 7 deletions(-) diff --git a/harp/clock.py b/harp/clock.py index e2ddb25..56321b1 100644 --- a/harp/clock.py +++ b/harp/clock.py @@ -2,6 +2,60 @@ import warnings +def align_timestamps_to_harp_clock(timestamps_to_align, start_times, harp_times): + """ + Aligns a set of timestamps or sample numbers to the Harp clock + + `decode_harp_clock` must be run first, in order to find the + start of each second in the Harp clock. + + Parameters + ---------- + timestamps_to_align : np.array + Timestamps or sample numbers to align to the Harp clock + start_times : np.array + Start times of each second in the Harp clock + harp_times : np.array + Harp clock times in seconds + + Returns + ------- + aligned_times : np.array + Aligned timestamps or sample numbers + """ + + if len(start_times) != len(harp_times): + raise ValueError( + "The number of start times must equal the number of Harp times" + ) + + N = len(start_times) + + slopes = np.zeros((N + 1,)) + offsets = np.zeros((N + 1,)) + + # compute overall slope and offset + A = np.vstack([start_times, np.ones(len(start_times))]).T + slopes[0], offsets[0] = np.linalg.lstsq(A, harp_times, rcond=None)[0] + + # compute slope and offset for each segment + for i in range(N): + x = start_times[i : i + 2] + y = harp_times[i : i + 2] + A = np.vstack([x, np.ones(len(x))]).T + slopes[i + 1], offsets[i + 1] = np.linalg.lstsq(A, y, rcond=None)[0] + + # find the nearest anchor point for each timestamp to align + nearest = np.searchsorted(start_times, timestamps_to_align, side="left") + + nearest[nearest == N] = 0 + + # interpolate between the anchor points + aligned_times = timestamps_to_align * slopes[nearest] + offsets[nearest] + + return aligned_times + + def decode_harp_clock( timestamps_or_sample_numbers, states, sample_rate=1, baud_rate=1000.0 ): diff --git a/tests/test_clock.py b/tests/test_clock.py index 17b9a72..dbecdb1 100644 --- a/tests/test_clock.py +++ b/tests/test_clock.py @@ -1,10 +1,12 @@ import numpy as np from pytest import mark -from harp.clock import decode_harp_clock +from harp.clock import decode_harp_clock, align_timestamps_to_harp_clock +import warnings # fmt: off testinput = [ { + # contains two valid Harp clock barcodes 'sample_numbers': np.array([ 0, 28832, 28892, 28922, 28952, 29012, 29072, 29132, 29192, 29252, 29282, 29312, 29402, 29432, 29492, 29522, 29552, 29642, @@ -22,6 +24,7 @@ 'expected_harp_times': np.array([3806874, 3806875]) }, { + # contains one valid and one invalid Harp clock barcode 'sample_numbers': np.array([ 0, 28833, 29103, 29133, 29283, 29343, 29373, 29433, 29463, 29553, 29613, 29643, 29703, 29733, 30003, 58835, 58865, 58895, @@ -36,6 +39,7 @@ 'expected_harp_times': np.array([2600960, 2600961]) }, { + # same as above, but with values in seconds 'sample_numbers': np.array([ 0. , 0.9611 , 0.9701 , 0.9711 , 0.9761 , 0.9781 , 0.9791 , 0.9811 , 0.9821 , 0.9851 , @@ -59,12 +63,25 @@ @mark.parametrize("test_input", testinput) def test_create_reader(test_input): - start_samples, harp_times = decode_harp_clock( - test_input["sample_numbers"], - test_input["states"], - sample_rate=test_input["sample_rate"], - baud_rate=1000, - ) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + start_samples, harp_times = decode_harp_clock( + test_input["sample_numbers"], + test_input["states"], + sample_rate=test_input["sample_rate"], + baud_rate=1000, + ) assert np.allclose(start_samples, test_input["expected_start_samples"]) assert np.allclose(harp_times, test_input["expected_harp_times"]) + + # test alignment for samples 1/2 second after anchors + ts = start_samples + test_input["sample_rate"] * 0.5 + aligned_times = align_timestamps_to_harp_clock(ts, start_samples, harp_times) + assert np.allclose(aligned_times, harp_times + 0.5) + + # test alignment for samples 1/2 second before anchors + ts = start_samples - test_input["sample_rate"] * 0.5 + aligned_times = align_timestamps_to_harp_clock(ts, start_samples, harp_times) + assert np.allclose(aligned_times, harp_times - 0.5) From 907dccb1e318d066c95c52dec430ac198229e36d Mon Sep 17 00:00:00 2001 From: Josh Siegle Date: Mon, 29 Apr 2024 10:21:04 -0700 Subject: [PATCH 4/7] Only allow timestamps in seconds --- harp/clock.py | 104 +++++++++++++++++++++++++++----------------- tests/test_clock.py | 94 +++++++++++++++++++-------------------- 2 files changed, 110 insertions(+), 88 deletions(-) diff --git a/harp/clock.py b/harp/clock.py index 56321b1..54fef48 100644 --- a/harp/clock.py +++ b/harp/clock.py @@ -4,7 +4,12 @@ def align_timestamps_to_harp_clock(timestamps_to_align, start_times, harp_times): """ - Aligns a set of timestamps or sample numbers to the Harp clock + Aligns a set of timestamps to the Harp clock. + + We assume these timestamps are acquired by a system that is + not aligned to the Harp clock. This function finds the nearest + anchor point in the Harp clock for each timestamp, and then + interpolates between the anchor points to align the timestamps. `decode_harp_clock` must be run first, in order to find the start of each second in the Harp clock. @@ -12,11 +17,13 @@ def align_timestamps_to_harp_clock(timestamps_to_align, start_times, harp_times) Parameters ---------- timestamps_to_align : np.array - Timestamps or sample numbers to align to the Harp clock + Local timestamps (in seconds) to align to the Harp clock start_times : np.array - Start times of each second in the Harp clock + Local start times of each second in the Harp clock + (output by `decode_harp_clock`) harp_times : np.array Harp clock times in seconds + (output by `decode_harp_clock`) Returns ------- @@ -57,11 +64,11 @@ def align_timestamps_to_harp_clock(timestamps_to_align, start_times, harp_times) def decode_harp_clock( - timestamps_or_sample_numbers, states, sample_rate=1, baud_rate=1000.0 + timestamps, states, baud_rate=1000.0 ): """ - Decodes Harp clock times (in seconds) from a sequence of sample - numbers and states. + Decodes Harp clock times (in seconds) from a sequence of local + event timestamps and states. The Harp Behavior board can be configured to output a digital signal that encodes the current Harp time as a 32-bit integer, @@ -88,53 +95,54 @@ def decode_harp_clock( Parameters ---------- - timestamps_or_sample_numbers : np.array - Float times in seconds or integer sample numbers - for each clock line transition + timestamps : np.array + Float times in seconds for each clock line transition + If the acquisition system outputs integer sample numbers + for each event, divide by the sample rate to convert to seconds states : np.array States (1 or 0) for each clock line transition - sample_rate : float - The sample rate at which the clock signal was acquired - When inputting timestamps in seconds, use a sample rate of 1 baud_rate : float - The baud rate of the clock signal + The baud rate of the Harp clock signal Returns ------- - starts : np.array - Sample numbers or timestamps at which each second begins + start_times : np.array + Timestamps at which each second begins harp_times : np.array Harp clock times in seconds """ - min_delta = sample_rate / 2 # 0.5 seconds + min_delta = 0.5 # seconds -- Harp clock events must always be + # at least this far apart - barcode_edges = get_barcode_edges(timestamps_or_sample_numbers, min_delta) + barcode_edges = get_barcode_edges(timestamps, min_delta) - start_samples = [timestamps_or_sample_numbers[edges[0]] for edges in barcode_edges] + start_times = np.array([timestamps[edges[0]] for edges in barcode_edges]) - harp_times = [ + harp_times = np.array([ convert_barcode( - timestamps_or_sample_numbers[edges[0] : edges[1]] / sample_rate, + timestamps[edges[0] : edges[1]], states[edges[0] : edges[1]], baud_rate=baud_rate, ) for edges in barcode_edges - ] + ]) - harp_times_corrected = correct_outliers(np.array(harp_times)) + start_times_corrected, harp_times_corrected = \ + remove_outliers(start_times, harp_times) - return np.array(start_samples), harp_times_corrected + return start_times_corrected, harp_times_corrected -def get_barcode_edges(sample_numbers, min_delta): +def get_barcode_edges(timestamps, min_delta): """ Returns the start and end indices of each barcode Parameters ---------- - sample_numbers : np.array - Sample numbers of clock line (high and low states) + timestamps : np.array + Timestamps (ins seconds) of clock line events + (high and low states) min_delta : int The minimum length between the end of one barcode and the start of the next @@ -145,7 +153,7 @@ def get_barcode_edges(sample_numbers, min_delta): Contains the start and end indices of each barcode """ - (splits,) = np.where(np.diff(sample_numbers) > min_delta) + (splits,) = np.where(np.diff(timestamps) > min_delta) return list(zip(splits[:-1] + 1, splits[1:] + 1)) @@ -186,35 +194,49 @@ def convert_barcode(transition_times, states, baud_rate): return harp_time -def correct_outliers(harp_times): +def remove_outliers(start_samples, harp_times): """ - Corrects outliers in the Harp clock times + Removes outliers from the Harp clock times + + These outliers are caused by problems decoding + the Harp clock signal, leading to consecutive times that + do not increase by exactly 1. These will be removed from + the array of Harp times, so they will not be used + as anchor points during subsequent clock alignment. + + If the times jump to a new value and continue to + increase by 1, either due to a reset of the Harp clock + or a gap in the data, these will be ignored. Parameters ---------- + start_samples : np.array + Harp clock start times in seconds harp_times : np.array Harp clock times in seconds Returns ------- + corrected_start_samples : np.array + Corrected Harp clock times in seconds corrected_harp_times : np.array Corrected Harp clock times in seconds """ - diffs = np.abs(np.diff(harp_times)) + original_indices = np.arange(len(harp_times)) - # Find the indices of the outliers - outliers = np.where(diffs > 1)[0] + new_indices = np.concatenate( + [sub_array for sub_array + in np.split(original_indices, + np.where(np.diff(harp_times) != 1)[0]+1) + if len(sub_array) > 1]) + + num_outliers = len(original_indices) - len(new_indices) - if len(outliers) == 1: - warnings.warn("One outlier found in the decoded Harp clock. Correcting...") - elif len(outliers) > 1: + if num_outliers > 0: warnings.warn( - f"{len(outliers)} outliers found in the decoded Harp clock. Correcting..." + f"{num_outliers} outlier{'s' if num_outliers > 1 else ''} " + + "found in the decoded Harp clock. Removing..." ) - # Correct the outliers - for outlier in outliers: - harp_times[outlier + 1] = harp_times[outlier] + 1 - - return harp_times + return start_samples[new_indices], harp_times[new_indices] diff --git a/tests/test_clock.py b/tests/test_clock.py index dbecdb1..c231b16 100644 --- a/tests/test_clock.py +++ b/tests/test_clock.py @@ -7,55 +7,56 @@ testinput = [ { # contains two valid Harp clock barcodes - 'sample_numbers': np.array([ - 0, 28832, 28892, 28922, 28952, 29012, 29072, 29132, 29192, - 29252, 29282, 29312, 29402, 29432, 29492, 29522, 29552, 29642, - 29702, 29732, 30002, 58835, 58865, 58925, 58955, 59015, 59075, - 59135, 59195, 59255, 59285, 59315, 59405, 59435, 59495, 59525, - 59555, 59645, 59705, 59735, 60005, 88838, 88928, 89018, 89078, - 89138, 89198, 89258, 89288, 89318, 89408, 89438, 89498, 89528, - 89558, 89648, 89708, 89738]), + 'timestamps': np.array([ + 0. , 0.96106667, 0.96306667, 0.96406667, 0.96506667, + 0.96706667, 0.96906667, 0.97106667, 0.97306667, 0.97506667, + 0.97606667, 0.97706667, 0.98006667, 0.98106667, 0.98306667, + 0.98406667, 0.98506667, 0.98806667, 0.99006667, 0.99106667, + 1.00006667, 1.96116667, 1.96216667, 1.96416667, 1.96516667, + 1.96716667, 1.96916667, 1.97116667, 1.97316667, 1.97516667, + 1.97616667, 1.97716667, 1.98016667, 1.98116667, 1.98316667, + 1.98416667, 1.98516667, 1.98816667, 1.99016667, 1.99116667, + 2.00016667, 2.96126667, 2.96426667, 2.96726667, 2.96926667, + 2.97126667, 2.97326667, 2.97526667, 2.97626667, 2.97726667, + 2.98026667, 2.98126667, 2.98326667, 2.98426667, 2.98526667, + 2.98826667, 2.99026667, 2.99126667]), 'states': np.array([ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]), - 'sample_rate': 30000., - 'expected_start_samples': np.array([28832, 58835]), + 'expected_start_times': np.array([0.96106667, 1.96116667]), 'expected_harp_times': np.array([3806874, 3806875]) }, { - # contains one valid and one invalid Harp clock barcode - 'sample_numbers': np.array([ - 0, 28833, 29103, 29133, 29283, 29343, 29373, 29433, 29463, - 29553, 29613, 29643, 29703, 29733, 30003, 58835, 58865, 58895, - 59105, 59135, 59285, 59345, 59375, 59435, 59465, 59555, 59615, - 59645, 59705, 59735, 59885, 59945, 59975, 60036, 60065, 60156, - 60216, 60246, 60306, 60336, 60606, 88958]), + # contains 4 valid Harp clock barcodes and one invalid barcode + 'timestamps': np.array([ + 0.14036667, 1.10146667, 1.10246667, 1.10346667, 1.10446667, + 1.11146667, 1.11246667, 1.11646667, 1.11746667, 1.11846667, + 1.11946667, 1.12146667, 1.12246667, 1.12546667, 1.12746667, + 1.12846667, 1.13046667, 1.13146667, 1.14046667, 2.10156667, + 2.10356667, 2.11156667, 2.11256667, 2.11656667, 2.11756667, + 2.11856667, 2.11956667, 2.12156667, 2.12256667, 2.12556667, + 2.12756667, 2.12856667, 2.13056667, 2.13156667, 2.14056667, + 3.10163333, 3.10263333, 3.11163333, 3.11263333, 3.11663333, + 3.11763333, 3.11863333, 3.11963333, 3.12163333, 3.12263333, + 3.12563333, 3.12763333, 3.12863333, 3.13063333, 3.13163333, + 3.14063333, 4.10173333, 4.11073333, 4.11173333, 4.11673333, + 4.11873333, 4.11973333, 4.12173333, 4.12273333, 4.12573333, + 4.12773333, 4.12873333, 4.13073333, 4.13173333, 4.14073333, + 5.1018 , 5.1028 , 5.1038 , 5.1108 , 5.1118 , + 5.1168 , 5.1188 , 5.1198 , 5.1218 , 5.1228 , + 5.1258 , 5.1278 , 5.1288 , 5.1308 , 5.1318 , + 5.1368 , 5.1388 , 5.1398 , 5.14183333, 5.1428 , + 5.14583333, 5.14783333, 5.14883333, 5.15083333, 5.15183333, + 5.16083333, 6.1059 ]), 'states': np.array([ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, - 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]), - 'sample_rate': 30000., - 'expected_start_samples': np.array([28833, 58835]), - 'expected_harp_times': np.array([2600960, 2600961]) - }, - { - # same as above, but with values in seconds - 'sample_numbers': np.array([ - 0. , 0.9611 , 0.9701 , 0.9711 , 0.9761 , - 0.9781 , 0.9791 , 0.9811 , 0.9821 , 0.9851 , - 0.9871 , 0.9881 , 0.9901 , 0.9911 , 1.0001 , - 1.96116667, 1.96216667, 1.96316667, 1.97016667, 1.97116667, - 1.97616667, 1.97816667, 1.97916667, 1.98116667, 1.98216667, - 1.98516667, 1.98716667, 1.98816667, 1.99016667, 1.99116667, - 1.99616667, 1.99816667, 1.99916667, 2.0012 , 2.00216667, - 2.0052 , 2.0072 , 2.0082 , 2.0102 , 2.0112 , - 2.0202 , 2.96526667]), - 'states': np.array([ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, - 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]), - 'sample_rate' : 1, - 'expected_start_samples': np.array([0.9611, 1.96116667]), - 'expected_harp_times': np.array([2600960, 2600961]) + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0]), + 'expected_start_times': np.array([1.10146667, 2.10156667, 3.10163333, 4.10173333]), + 'expected_harp_times': np.array([2600957, 2600958, 2600959, 2600960]) } ] # fmt: on @@ -66,22 +67,21 @@ def test_create_reader(test_input): with warnings.catch_warnings(): warnings.simplefilter("ignore") - start_samples, harp_times = decode_harp_clock( - test_input["sample_numbers"], + start_times, harp_times = decode_harp_clock( + test_input["timestamps"], test_input["states"], - sample_rate=test_input["sample_rate"], baud_rate=1000, ) - assert np.allclose(start_samples, test_input["expected_start_samples"]) + assert np.allclose(start_times, test_input["expected_start_times"]) assert np.allclose(harp_times, test_input["expected_harp_times"]) # test alignment for samples 1/2 second after anchors - ts = start_samples + test_input["sample_rate"] * 0.5 - aligned_times = align_timestamps_to_harp_clock(ts, start_samples, harp_times) + ts = start_times + 0.5 + aligned_times = align_timestamps_to_harp_clock(ts, start_times, harp_times) assert np.allclose(aligned_times, harp_times + 0.5) # test alignment for samples 1/2 second before anchors - ts = start_samples - test_input["sample_rate"] * 0.5 - aligned_times = align_timestamps_to_harp_clock(ts, start_samples, harp_times) + ts = start_times - 0.5 + aligned_times = align_timestamps_to_harp_clock(ts, start_times, harp_times) assert np.allclose(aligned_times, harp_times - 0.5) From 3644013e9108e1c4db8d9ef44347a669477275e7 Mon Sep 17 00:00:00 2001 From: Josh Siegle Date: Mon, 29 Apr 2024 10:22:34 -0700 Subject: [PATCH 5/7] Sample numbers --> timestamps --- harp/clock.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/harp/clock.py b/harp/clock.py index 54fef48..1b0ede5 100644 --- a/harp/clock.py +++ b/harp/clock.py @@ -28,7 +28,7 @@ def align_timestamps_to_harp_clock(timestamps_to_align, start_times, harp_times) Returns ------- aligned_times : np.array - Aligned timestamps or sample numbers + Aligned timestamps """ if len(start_times) != len(harp_times): @@ -194,7 +194,7 @@ def convert_barcode(transition_times, states, baud_rate): return harp_time -def remove_outliers(start_samples, harp_times): +def remove_outliers(start_times, harp_times): """ Removes outliers from the Harp clock times @@ -210,14 +210,14 @@ def remove_outliers(start_samples, harp_times): Parameters ---------- - start_samples : np.array + start_times : np.array Harp clock start times in seconds harp_times : np.array Harp clock times in seconds Returns ------- - corrected_start_samples : np.array + corrected_start_times : np.array Corrected Harp clock times in seconds corrected_harp_times : np.array Corrected Harp clock times in seconds @@ -239,4 +239,4 @@ def remove_outliers(start_samples, harp_times): "found in the decoded Harp clock. Removing..." ) - return start_samples[new_indices], harp_times[new_indices] + return start_times[new_indices], harp_times[new_indices] From 0b0be8a86cbfcbdd1d8b881e8461977359fbb673 Mon Sep 17 00:00:00 2001 From: Josh Siegle Date: Mon, 29 Apr 2024 10:22:57 -0700 Subject: [PATCH 6/7] Apply formatting --- harp/clock.py | 49 +++++++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/harp/clock.py b/harp/clock.py index 1b0ede5..201984b 100644 --- a/harp/clock.py +++ b/harp/clock.py @@ -63,9 +63,7 @@ def align_timestamps_to_harp_clock(timestamps_to_align, start_times, harp_times) return aligned_times -def decode_harp_clock( - timestamps, states, baud_rate=1000.0 -): +def decode_harp_clock(timestamps, states, baud_rate=1000.0): """ Decodes Harp clock times (in seconds) from a sequence of local event timestamps and states. @@ -113,23 +111,26 @@ def decode_harp_clock( """ min_delta = 0.5 # seconds -- Harp clock events must always be - # at least this far apart + # at least this far apart barcode_edges = get_barcode_edges(timestamps, min_delta) start_times = np.array([timestamps[edges[0]] for edges in barcode_edges]) - harp_times = np.array([ - convert_barcode( - timestamps[edges[0] : edges[1]], - states[edges[0] : edges[1]], - baud_rate=baud_rate, - ) - for edges in barcode_edges - ]) + harp_times = np.array( + [ + convert_barcode( + timestamps[edges[0] : edges[1]], + states[edges[0] : edges[1]], + baud_rate=baud_rate, + ) + for edges in barcode_edges + ] + ) - start_times_corrected, harp_times_corrected = \ - remove_outliers(start_times, harp_times) + start_times_corrected, harp_times_corrected = remove_outliers( + start_times, harp_times + ) return start_times_corrected, harp_times_corrected @@ -201,7 +202,7 @@ def remove_outliers(start_times, harp_times): These outliers are caused by problems decoding the Harp clock signal, leading to consecutive times that do not increase by exactly 1. These will be removed from - the array of Harp times, so they will not be used + the array of Harp times, so they will not be used as anchor points during subsequent clock alignment. If the times jump to a new value and continue to @@ -226,17 +227,21 @@ def remove_outliers(start_times, harp_times): original_indices = np.arange(len(harp_times)) new_indices = np.concatenate( - [sub_array for sub_array - in np.split(original_indices, - np.where(np.diff(harp_times) != 1)[0]+1) - if len(sub_array) > 1]) - + [ + sub_array + for sub_array in np.split( + original_indices, np.where(np.diff(harp_times) != 1)[0] + 1 + ) + if len(sub_array) > 1 + ] + ) + num_outliers = len(original_indices) - len(new_indices) if num_outliers > 0: warnings.warn( - f"{num_outliers} outlier{'s' if num_outliers > 1 else ''} " + - "found in the decoded Harp clock. Removing..." + f"{num_outliers} outlier{'s' if num_outliers > 1 else ''} " + + "found in the decoded Harp clock. Removing..." ) return start_times[new_indices], harp_times[new_indices] From 640d08559c1845a2dca92b7cb2f7e38927149978 Mon Sep 17 00:00:00 2001 From: Josh Siegle Date: Wed, 1 May 2024 11:19:01 -0700 Subject: [PATCH 7/7] Make function name more general --- harp/clock.py | 15 ++++++++------- tests/test_clock.py | 6 +++--- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/harp/clock.py b/harp/clock.py index 201984b..f22431f 100644 --- a/harp/clock.py +++ b/harp/clock.py @@ -2,9 +2,9 @@ import warnings -def align_timestamps_to_harp_clock(timestamps_to_align, start_times, harp_times): +def align_timestamps_to_anchor_points(timestamps_to_align, start_times, anchor_times): """ - Aligns a set of timestamps to the Harp clock. + Aligns a set of timestamps to anchor points (e.g., Harp clock times) We assume these timestamps are acquired by a system that is not aligned to the Harp clock. This function finds the nearest @@ -21,8 +21,9 @@ def align_timestamps_to_harp_clock(timestamps_to_align, start_times, harp_times) start_times : np.array Local start times of each second in the Harp clock (output by `decode_harp_clock`) - harp_times : np.array - Harp clock times in seconds + anchor_times : np.array + Global clock times in seconds (typically the Harp clock times, + but can be any set of anchor points to align to) (output by `decode_harp_clock`) Returns @@ -31,7 +32,7 @@ def align_timestamps_to_harp_clock(timestamps_to_align, start_times, harp_times) Aligned timestamps """ - if len(start_times) != len(harp_times): + if len(start_times) != len(anchor_times): raise ValueError( "The number of start times must equal the number of Harp times" ) @@ -43,12 +44,12 @@ def align_timestamps_to_harp_clock(timestamps_to_align, start_times, harp_times) # compute overall slope and offset A = np.vstack([start_times, np.ones(len(start_times))]).T - slopes[0], offsets[0] = np.linalg.lstsq(A, harp_times, rcond=None)[0] + slopes[0], offsets[0] = np.linalg.lstsq(A, anchor_times, rcond=None)[0] # compute slope and offset for each segment for i in range(N): x = start_times[i : i + 2] - y = harp_times[i : i + 2] + y = anchor_times[i : i + 2] A = np.vstack([x, np.ones(len(x))]).T slopes[i + 1], offsets[i + 1] = np.linalg.lstsq(A, y, rcond=None)[0] diff --git a/tests/test_clock.py b/tests/test_clock.py index c231b16..5b7dd7f 100644 --- a/tests/test_clock.py +++ b/tests/test_clock.py @@ -1,6 +1,6 @@ import numpy as np from pytest import mark -from harp.clock import decode_harp_clock, align_timestamps_to_harp_clock +from harp.clock import decode_harp_clock, align_timestamps_to_anchor_points import warnings # fmt: off @@ -78,10 +78,10 @@ def test_create_reader(test_input): # test alignment for samples 1/2 second after anchors ts = start_times + 0.5 - aligned_times = align_timestamps_to_harp_clock(ts, start_times, harp_times) + aligned_times = align_timestamps_to_anchor_points(ts, start_times, harp_times) assert np.allclose(aligned_times, harp_times + 0.5) # test alignment for samples 1/2 second before anchors ts = start_times - 0.5 - aligned_times = align_timestamps_to_harp_clock(ts, start_times, harp_times) + aligned_times = align_timestamps_to_anchor_points(ts, start_times, harp_times) assert np.allclose(aligned_times, harp_times - 0.5)