diff --git a/harp/clock.py b/harp/clock.py new file mode 100644 index 0000000..f22431f --- /dev/null +++ b/harp/clock.py @@ -0,0 +1,248 @@ +import numpy as np +import warnings + + +def align_timestamps_to_anchor_points(timestamps_to_align, start_times, anchor_times): + """ + Aligns a set of timestamps to anchor points (e.g., Harp clock times) + + We assume these timestamps are acquired by a system that is + not aligned to the Harp clock. This function finds the nearest + anchor point in the Harp clock for each timestamp, and then + interpolates between the anchor points to align the timestamps. + + `decode_harp_clock` must be run first, in order to find the + start of each second in the Harp clock. + + Parameters + ---------- + timestamps_to_align : np.array + Local timestamps (in seconds) to align to the Harp clock + start_times : np.array + Local start times of each second in the Harp clock + (output by `decode_harp_clock`) + anchor_times : np.array + Global clock times in seconds (typically the Harp clock times, + but can be any set of anchor points to align to) + (output by `decode_harp_clock`) + + Returns + ------- + aligned_times : np.array + Aligned timestamps + """ + + if len(start_times) != len(anchor_times): + raise ValueError( + "The number of start times must equal the number of Harp times" + ) + + N = len(start_times) + + slopes = np.zeros((N + 1,)) + offsets = np.zeros((N + 1,)) + + # compute overall slope and offset + A = np.vstack([start_times, np.ones(len(start_times))]).T + slopes[0], offsets[0] = np.linalg.lstsq(A, anchor_times, rcond=None)[0] + + # compute slope and offset for each segment + for i in range(N): + x = start_times[i : i + 2] + y = anchor_times[i : i + 2] + A = np.vstack([x, np.ones(len(x))]).T + slopes[i + 1], offsets[i + 1] = np.linalg.lstsq(A, y, rcond=None)[0] + + # find the nearest anchor point for each timestamp to align + nearest = np.searchsorted(start_times, timestamps_to_align, side="left") + + nearest[nearest == N] = 0 + + # interpolate between the anchor points + aligned_times = timestamps_to_align * slopes[nearest] + offsets[nearest] + + return aligned_times + + +def decode_harp_clock(timestamps, states, baud_rate=1000.0): + """ + Decodes Harp clock times (in seconds) from a sequence of local + event timestamps and states. + + The Harp Behavior board can be configured to output a digital + signal that encodes the current Harp time as a 32-bit integer, + which is emitted once per second. + + The format of the signal is as follows: + + Default value: high + Start bit: low -- indicates the transition to the next second + 8 bits: byte 0, with least significant bit first + 2 bits: high / low transition + 8 bits: byte 1, with least significant bit first + 2 bits: high / low transition + 8 bits: byte 2, with least significant bit first + 2 bits: high / low transition + 8 bits: byte 3, with least significant bit first + Final bit: reset to high + + Although the baud rate of the internal Harp clock is + 100 kHz, the Behavior board outputs the clock signal + at a lower baud rate (typically 1 kHz), so it can be + acquired by data acquisition systems with sample rates + as low as 5 kHz. + + Parameters + ---------- + timestamps : np.array + Float times in seconds for each clock line transition + If the acquisition system outputs integer sample numbers + for each event, divide by the sample rate to convert to seconds + states : np.array + States (1 or 0) for each clock line transition + baud_rate : float + The baud rate of the Harp clock signal + + Returns + ------- + start_times : np.array + Timestamps at which each second begins + harp_times : np.array + Harp clock times in seconds + """ + + min_delta = 0.5 # seconds -- Harp clock events must always be + # at least this far apart + + barcode_edges = get_barcode_edges(timestamps, min_delta) + + start_times = np.array([timestamps[edges[0]] for edges in barcode_edges]) + + harp_times = np.array( + [ + convert_barcode( + timestamps[edges[0] : edges[1]], + states[edges[0] : edges[1]], + baud_rate=baud_rate, + ) + for edges in barcode_edges + ] + ) + + start_times_corrected, harp_times_corrected = remove_outliers( + start_times, harp_times + ) + + return start_times_corrected, harp_times_corrected + + +def get_barcode_edges(timestamps, min_delta): + """ + Returns the start and end indices of each barcode + + Parameters + ---------- + timestamps : np.array + Timestamps (ins seconds) of clock line events + (high and low states) + min_delta : int + The minimum length between the end of one + barcode and the start of the next + + Returns + ------- + edges : list of tuples + Contains the start and end indices of each barcode + """ + + (splits,) = np.where(np.diff(timestamps) > min_delta) + + return list(zip(splits[:-1] + 1, splits[1:] + 1)) + + +def convert_barcode(transition_times, states, baud_rate): + """ + Converts Harp clock barcode to a clock time in seconds + + Parameters + ---------- + transition_times : np.array + Times (in seconds) each clock line transition + states : np.array + states (1 or 0) for each clock line transition + baud_rate : float + The baud rate of the clock signal + + Returns + ------- + harp_time : int + Harp time in seconds for the current barcode + + """ + + intervals = np.round(np.diff(transition_times * baud_rate)).astype("int") + + barcode = np.concatenate( + [np.ones((count,)) * state for state, count in zip(states[:-1], intervals)] + ).astype("int") + + val = np.concatenate( + (np.arange(1, 9), np.arange(11, 19), np.arange(21, 29), np.arange(31, 39)) + ) + + s = np.flip(barcode[val]) + harp_time = s.dot(2 ** np.arange(s.size)[::-1]) + + return harp_time + + +def remove_outliers(start_times, harp_times): + """ + Removes outliers from the Harp clock times + + These outliers are caused by problems decoding + the Harp clock signal, leading to consecutive times that + do not increase by exactly 1. These will be removed from + the array of Harp times, so they will not be used + as anchor points during subsequent clock alignment. + + If the times jump to a new value and continue to + increase by 1, either due to a reset of the Harp clock + or a gap in the data, these will be ignored. + + Parameters + ---------- + start_times : np.array + Harp clock start times in seconds + harp_times : np.array + Harp clock times in seconds + + Returns + ------- + corrected_start_times : np.array + Corrected Harp clock times in seconds + corrected_harp_times : np.array + Corrected Harp clock times in seconds + """ + + original_indices = np.arange(len(harp_times)) + + new_indices = np.concatenate( + [ + sub_array + for sub_array in np.split( + original_indices, np.where(np.diff(harp_times) != 1)[0] + 1 + ) + if len(sub_array) > 1 + ] + ) + + num_outliers = len(original_indices) - len(new_indices) + + if num_outliers > 0: + warnings.warn( + f"{num_outliers} outlier{'s' if num_outliers > 1 else ''} " + + "found in the decoded Harp clock. Removing..." + ) + + return start_times[new_indices], harp_times[new_indices] diff --git a/tests/test_clock.py b/tests/test_clock.py new file mode 100644 index 0000000..5b7dd7f --- /dev/null +++ b/tests/test_clock.py @@ -0,0 +1,87 @@ +import numpy as np +from pytest import mark +from harp.clock import decode_harp_clock, align_timestamps_to_anchor_points +import warnings + +# fmt: off +testinput = [ + { + # contains two valid Harp clock barcodes + 'timestamps': np.array([ + 0. , 0.96106667, 0.96306667, 0.96406667, 0.96506667, + 0.96706667, 0.96906667, 0.97106667, 0.97306667, 0.97506667, + 0.97606667, 0.97706667, 0.98006667, 0.98106667, 0.98306667, + 0.98406667, 0.98506667, 0.98806667, 0.99006667, 0.99106667, + 1.00006667, 1.96116667, 1.96216667, 1.96416667, 1.96516667, + 1.96716667, 1.96916667, 1.97116667, 1.97316667, 1.97516667, + 1.97616667, 1.97716667, 1.98016667, 1.98116667, 1.98316667, + 1.98416667, 1.98516667, 1.98816667, 1.99016667, 1.99116667, + 2.00016667, 2.96126667, 2.96426667, 2.96726667, 2.96926667, + 2.97126667, 2.97326667, 2.97526667, 2.97626667, 2.97726667, + 2.98026667, 2.98126667, 2.98326667, 2.98426667, 2.98526667, + 2.98826667, 2.99026667, 2.99126667]), + 'states': np.array([ + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]), + 'expected_start_times': np.array([0.96106667, 1.96116667]), + 'expected_harp_times': np.array([3806874, 3806875]) + }, + { + # contains 4 valid Harp clock barcodes and one invalid barcode + 'timestamps': np.array([ + 0.14036667, 1.10146667, 1.10246667, 1.10346667, 1.10446667, + 1.11146667, 1.11246667, 1.11646667, 1.11746667, 1.11846667, + 1.11946667, 1.12146667, 1.12246667, 1.12546667, 1.12746667, + 1.12846667, 1.13046667, 1.13146667, 1.14046667, 2.10156667, + 2.10356667, 2.11156667, 2.11256667, 2.11656667, 2.11756667, + 2.11856667, 2.11956667, 2.12156667, 2.12256667, 2.12556667, + 2.12756667, 2.12856667, 2.13056667, 2.13156667, 2.14056667, + 3.10163333, 3.10263333, 3.11163333, 3.11263333, 3.11663333, + 3.11763333, 3.11863333, 3.11963333, 3.12163333, 3.12263333, + 3.12563333, 3.12763333, 3.12863333, 3.13063333, 3.13163333, + 3.14063333, 4.10173333, 4.11073333, 4.11173333, 4.11673333, + 4.11873333, 4.11973333, 4.12173333, 4.12273333, 4.12573333, + 4.12773333, 4.12873333, 4.13073333, 4.13173333, 4.14073333, + 5.1018 , 5.1028 , 5.1038 , 5.1108 , 5.1118 , + 5.1168 , 5.1188 , 5.1198 , 5.1218 , 5.1228 , + 5.1258 , 5.1278 , 5.1288 , 5.1308 , 5.1318 , + 5.1368 , 5.1388 , 5.1398 , 5.14183333, 5.1428 , + 5.14583333, 5.14783333, 5.14883333, 5.15083333, 5.15183333, + 5.16083333, 6.1059 ]), + 'states': np.array([ + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0]), + 'expected_start_times': np.array([1.10146667, 2.10156667, 3.10163333, 4.10173333]), + 'expected_harp_times': np.array([2600957, 2600958, 2600959, 2600960]) + } +] +# fmt: on + + +@mark.parametrize("test_input", testinput) +def test_create_reader(test_input): + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + start_times, harp_times = decode_harp_clock( + test_input["timestamps"], + test_input["states"], + baud_rate=1000, + ) + + assert np.allclose(start_times, test_input["expected_start_times"]) + assert np.allclose(harp_times, test_input["expected_harp_times"]) + + # test alignment for samples 1/2 second after anchors + ts = start_times + 0.5 + aligned_times = align_timestamps_to_anchor_points(ts, start_times, harp_times) + assert np.allclose(aligned_times, harp_times + 0.5) + + # test alignment for samples 1/2 second before anchors + ts = start_times - 0.5 + aligned_times = align_timestamps_to_anchor_points(ts, start_times, harp_times) + assert np.allclose(aligned_times, harp_times - 0.5)