From ac492567d8d3e9a7d71d6d7c249f0a1e4a0f25c5 Mon Sep 17 00:00:00 2001 From: glopesdev Date: Wed, 19 Mar 2025 15:30:41 +0000 Subject: [PATCH 1/5] Add uv environment to gitignore --- .gitignore | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/.gitignore b/.gitignore index 520b87b..28aa57a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,12 +1,23 @@ -# Environment files -.vscode -.venv - -# Python temp files -__pycache__ -*.pyc -*.egg-info -dist - -# Data files -/data \ No newline at end of file +# Byte-compiled / optimized / DLL files +__pycache__/ + +# Distribution / packaging +dist/ +_version.py +*.egg-info/ +*.egg + +# IDE +.vscode/* + +# misc +log*.txt +scratch/ +scratch*.py + +# Test +.coverage + +# Environment +.venv/ +uv.lock \ No newline at end of file From a331cea886ddac69cf00db4e67bc9667c7917042 Mon Sep 17 00:00:00 2001 From: glopesdev Date: Thu, 20 Mar 2025 00:48:23 +0000 Subject: [PATCH 2/5] Allow formatting data as binary protocol messages --- harp/io.py | 134 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 130 insertions(+), 4 deletions(-) diff --git a/harp/io.py b/harp/io.py index aefa514..219e063 100644 --- a/harp/io.py +++ b/harp/io.py @@ -24,8 +24,9 @@ class MessageType(IntEnum): _SECONDS_PER_TICK = 32e-6 +_PAYLOAD_TIMESTAMP_MASK = 0x10 _messagetypes = [type.name for type in MessageType] -_payloadtypes = { +_dtypefrompayloadtype = { 1: np.dtype(np.uint8), 2: np.dtype(np.uint16), 4: np.dtype(np.uint32), @@ -36,6 +37,7 @@ class MessageType(IntEnum): 136: np.dtype(np.int64), 68: np.dtype(np.float32), } +_payloadtypefromdtype = {v: k for k, v in _dtypefrompayloadtype.items()} def read( @@ -140,20 +142,20 @@ def _fromraw( nrows = len(data) // stride payloadtype = data[4] payloadoffset = 5 - if payloadtype & 0x10 != 0: + if payloadtype & _PAYLOAD_TIMESTAMP_MASK != 0: seconds = np.ndarray(nrows, dtype=np.uint32, buffer=data, offset=payloadoffset, strides=stride) payloadoffset += 4 micros = np.ndarray(nrows, dtype=np.uint16, buffer=data, offset=payloadoffset, strides=stride) payloadoffset += 2 time = micros * _SECONDS_PER_TICK + seconds - payloadtype = payloadtype & ~np.uint8(0x10) + payloadtype = payloadtype & ~np.uint8(_PAYLOAD_TIMESTAMP_MASK) if epoch is not None: time = epoch + pd.to_timedelta(time, "s") # type: ignore index = pd.Series(time) index.name = "Time" payloadsize = stride - payloadoffset - 1 - payloadtype = _payloadtypes[payloadtype] + payloadtype = _dtypefrompayloadtype[payloadtype] if dtype is not None and dtype != payloadtype: raise ValueError(f"expected payload type {dtype} but got {payloadtype}") @@ -176,3 +178,127 @@ def _fromraw( msgtype = pd.Categorical.from_codes(msgtype, categories=_messagetypes) # type: ignore result[MessageType.__name__] = msgtype return result + + +def write( + file: Union[str, bytes, PathLike[Any], BinaryIO], + data: pd.DataFrame, + address: int, + dtype: Optional[np.dtype] = None, + port: Optional[int] = None, + epoch: Optional[datetime] = None, + message_type: Optional[MessageType] = None, +): + """Write single-register Harp data to the specified file. + + Parameters + ---------- + file + Open file object or filename where to store binary data from + a single device register. + data + Pandas data frame containing message payload. + address + Register address used to identify all formatted Harp messages. + dtype + Data type of the register payload. If specified, all data will + be converted before formatting the binary payload. + port + Optional port value used for all formatted Harp messages. + epoch + Reference datetime at which time zero begins. If specified, + the input data frame must have a datetime index. + message_type + Optional message type used for all formatted Harp messages. + If not specified, data must contain a MessageType column. + """ + buffer = format(data, address, dtype, port, epoch, message_type) + buffer.tofile(file) + + +def format( + data: pd.DataFrame, + address: int, + dtype: Optional[np.dtype] = None, + port: Optional[int] = None, + epoch: Optional[datetime] = None, + message_type: Optional[MessageType] = None, +) -> npt.NDArray[np.uint8]: + """Format single-register Harp data as a flat binary buffer. + + Parameters + ---------- + data + Pandas data frame containing message payload. + address + Register address used to identify all formatted Harp messages. + dtype + Data type of the register payload. If specified, all data will + be converted before formatting the binary payload. + port + Optional port value used for all formatted Harp messages. + epoch + Reference datetime at which time zero begins. If specified, + the input data frame must have a datetime index. + message_type + Optional message type used for all formatted Harp messages. + If not specified, data must contain a MessageType column. + + Returns + ------- + An array object containing message data formatted according + to the Harp binary protocol. + """ + if len(data) == 0: + return np.empty(0, dtype=np.uint8) + + if "MessageType" in data.columns: + msgtype = data["MessageType"].cat.codes + payload = data.iloc[:, 0:-1].values + elif message_type is not None: + msgtype = message_type + payload = data.values + else: + raise ValueError(f"message type must be specified either in the data or as argument") + + time = data.index + is_timestamped = True + if epoch is not None: + if not isinstance(time, pd.DatetimeIndex): + raise ValueError(f"expected datetime index to encode with epoch but got {time.inferred_type}") + time = (time - epoch).total_seconds() + elif isinstance(time, pd.RangeIndex): + is_timestamped = False + + if dtype is not None: + payload = payload.astype(dtype) + + if port is None: + port = 255 + + payloadtype = _payloadtypefromdtype[payload.dtype] + payloadlength = payload.shape[1] * payload.dtype.itemsize + stride = payloadlength + 6 + if is_timestamped: + payloadtype |= _PAYLOAD_TIMESTAMP_MASK + stride += 6 + + nrows = len(data) + buffer = np.empty((nrows, stride), dtype=np.uint8) + buffer[:, 0] = msgtype + buffer[:, 1:5] = [stride - 2, address, port, payloadtype] + + payloadoffset = 5 + if is_timestamped: + seconds = time.astype(np.uint32) + micros = np.around(((time - seconds) / _SECONDS_PER_TICK).values).astype(np.uint16) + buffer[:, 5:9] = np.ndarray((nrows, 4), dtype=np.uint8, buffer=seconds.values) + buffer[:, 9:11] = np.ndarray((nrows, 2), dtype=np.uint8, buffer=micros) + payloadoffset += 6 + + payloadstop = payloadoffset + payloadlength + buffer[:, payloadoffset:payloadstop] = np.ndarray( + (nrows, payloadlength), dtype=np.uint8, buffer=np.ascontiguousarray(payload) + ) + buffer[:, -1] = np.sum(buffer[:, 0:-1], axis=1, dtype=np.uint8) + return buffer.reshape(-1) From 895c74dec66cb8f6662356464f122819a51be202 Mon Sep 17 00:00:00 2001 From: glopesdev Date: Thu, 20 Mar 2025 00:48:49 +0000 Subject: [PATCH 3/5] Add round-trip regression tests for binary format --- tests/test_io.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/tests/test_io.py b/tests/test_io.py index d38e90c..ddb0a44 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -4,7 +4,7 @@ import pytest from pytest import mark -from harp.io import MessageType, parse, read +from harp.io import MessageType, format, parse, read from tests.params import DataFileParam testdata = [ @@ -63,3 +63,26 @@ def test_read(dataFile: DataFileParam): if dataFile.expected_cols: for col in dataFile.expected_cols: assert col in data.columns + + +writedata = [ + DataFileParam(path="data/device_0.bin", expected_rows=1, expected_address=0, keep_type=True), +] + + +@mark.parametrize("dataFile", writedata) +def test_write(dataFile: DataFileParam): + if dataFile.expected_address is None: + raise AssertionError("expected address must be defined for all write tests") + + buffer = np.fromfile(dataFile.path, np.uint8) + data = parse( + buffer, + address=dataFile.expected_address, + dtype=dataFile.expected_dtype, + length=dataFile.expected_length, + keep_type=dataFile.keep_type, + ) + assert len(data) == dataFile.expected_rows + write_buffer = format(data, address=dataFile.expected_address) + assert np.array_equal(buffer, write_buffer) From 8c6b4799ead7cfa1a549933c74d1cdb310d72066 Mon Sep 17 00:00:00 2001 From: glopesdev Date: Thu, 20 Mar 2025 01:00:51 +0000 Subject: [PATCH 4/5] Ensure package name is set --- harp/schema.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/harp/schema.py b/harp/schema.py index cee1532..d60b797 100644 --- a/harp/schema.py +++ b/harp/schema.py @@ -8,6 +8,9 @@ def _read_common_registers() -> Registers: + if __package__ is None: + raise ValueError("__package__ is None: unable to read common registers") + file = resources.files(__package__) / "common.yml" with file.open("r") as fileIO: return parse_yaml_raw_as(Registers, fileIO.read()) From e4130f83c9a41a1afc11f1f44639c88b2ca7b6cc Mon Sep 17 00:00:00 2001 From: glopesdev Date: Thu, 20 Mar 2025 23:19:11 +0000 Subject: [PATCH 5/5] Avoid unnecessary copies and shape assumptions --- harp/io.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/harp/io.py b/harp/io.py index 219e063..75ec78b 100644 --- a/harp/io.py +++ b/harp/io.py @@ -254,7 +254,7 @@ def format( if "MessageType" in data.columns: msgtype = data["MessageType"].cat.codes - payload = data.iloc[:, 0:-1].values + payload = data[data.columns.drop("MessageType")].values elif message_type is not None: msgtype = message_type payload = data.values @@ -271,7 +271,7 @@ def format( is_timestamped = False if dtype is not None: - payload = payload.astype(dtype) + payload = payload.astype(dtype, copy=False) if port is None: port = 255