From a97227f9d0eeed7abc632e149eb37400b0212601 Mon Sep 17 00:00:00 2001 From: Patrick Upson Date: Thu, 19 May 2022 07:25:26 -0300 Subject: [PATCH 1/7] added stream support to from_btl for issue #142 --- ctd/read.py | 8 ++++++-- tests/test_read.py | 12 ++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/ctd/read.py b/ctd/read.py index ac277e4..c89f5b5 100644 --- a/ctd/read.py +++ b/ctd/read.py @@ -61,6 +61,9 @@ def _open_compressed(fname): def _read_file(fname): """Read file contents.""" + if isinstance(fname, StringIO): + return fname + if not isinstance(fname, Path): fname = Path(fname).resolve() @@ -212,7 +215,7 @@ def from_bl(fname): return df -def from_btl(fname): +def from_btl(fname, name=None): """ DataFrame constructor to open Seabird CTD BTL-ASCII format. @@ -261,7 +264,8 @@ def from_btl(fname): df["Statistic"] = df["Statistic"].str.replace(r"\(|\)", "") # (avg) to avg - name = _basename(fname)[1] + if name is None: + name = _basename(fname)[1] dtypes = { "bpos": int, diff --git a/tests/test_read.py b/tests/test_read.py index ca98459..0e47b48 100644 --- a/tests/test_read.py +++ b/tests/test_read.py @@ -58,6 +58,13 @@ def btl_duplicate_header_name(): yield ctd.from_btl(data_path.joinpath("btl", "alt_bottletest.BTL")) +@pytest.fixture +def btl_as_stream(): + file = open(mode="rb", file=data_path.joinpath("btl", "alt_bottletest.BTL")) + stream = StringIO(file.read().decode('cp1252')) + yield ctd.from_btl(stream, "alt_bottletest") + + @pytest.fixture def ros(): yield ctd.rosette_summary(data_path.joinpath("CTD", "g01l03s01m-m2.ros")) @@ -89,6 +96,11 @@ def test_btl_with_dup_cols(btl_duplicate_header_name): ) +def test_btl_as_stringio(btl_as_stream): + assert isinstance(btl_as_stream, pd.DataFrame) + assert not btl_as_stream.empty + + def test_ros_is_dataframe(ros): assert isinstance(ros, pd.DataFrame) assert not ros.empty From 8fce656ee0465796572fc22870fe59527abff333 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 19 May 2022 10:26:59 +0000 Subject: [PATCH 2/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_read.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_read.py b/tests/test_read.py index 0e47b48..6067ade 100644 --- a/tests/test_read.py +++ b/tests/test_read.py @@ -61,7 +61,7 @@ def btl_duplicate_header_name(): @pytest.fixture def btl_as_stream(): file = open(mode="rb", file=data_path.joinpath("btl", "alt_bottletest.BTL")) - stream = StringIO(file.read().decode('cp1252')) + stream = StringIO(file.read().decode("cp1252")) yield ctd.from_btl(stream, "alt_bottletest") From 7c21aa953369e75ea362d78884fbc1ea6bf257f7 Mon Sep 17 00:00:00 2001 From: Patrick Upson Date: Thu, 19 May 2022 13:48:05 -0300 Subject: [PATCH 3/7] fixed _read_file doc_string and implemented a string copy --- ctd/read.py | 6 ++++-- tests/test_read.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/ctd/read.py b/ctd/read.py index c89f5b5..9822342 100644 --- a/ctd/read.py +++ b/ctd/read.py @@ -60,9 +60,11 @@ def _open_compressed(fname): def _read_file(fname): - """Read file contents.""" + """Read file contents, or read from StringIO object.""" if isinstance(fname, StringIO): - return fname + fname.seek(0) + text = fname.read() + return StringIO(text) if not isinstance(fname, Path): fname = Path(fname).resolve() diff --git a/tests/test_read.py b/tests/test_read.py index 6067ade..4a87697 100644 --- a/tests/test_read.py +++ b/tests/test_read.py @@ -62,7 +62,7 @@ def btl_duplicate_header_name(): def btl_as_stream(): file = open(mode="rb", file=data_path.joinpath("btl", "alt_bottletest.BTL")) stream = StringIO(file.read().decode("cp1252")) - yield ctd.from_btl(stream, "alt_bottletest") + yield ctd.from_btl(stream) @pytest.fixture From 87e64fc1c1c9ff61dc904395b9b0be92f1799ccf Mon Sep 17 00:00:00 2001 From: Patrick Upson Date: Tue, 24 May 2022 07:30:34 -0300 Subject: [PATCH 4/7] Looking for file name in _parse_seabird method --- ctd/read.py | 11 ++++++++--- tests/test_read.py | 14 +++++++------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/ctd/read.py b/ctd/read.py index 9822342..7d00a13 100644 --- a/ctd/read.py +++ b/ctd/read.py @@ -107,6 +107,7 @@ def _parse_seabird(lines, ftype): """Parse searbird formats.""" # Initialize variables. lon = lat = time = None, None, None + fname = None skiprows = 0 metadata = {} @@ -124,6 +125,9 @@ def _parse_seabird(lines, ftype): # Seabird headers starts with *. if line.startswith("*"): header.append(line) + if "FileName" in line: + file_path = line.split("=")[-1].strip() + fname = Path(file_path).stem # Seabird configuration starts with #. if line.startswith("#"): @@ -177,6 +181,7 @@ def _parse_seabird(lines, ftype): names.append("Statistic") metadata.update( { + "name": fname if fname else "default_file", "header": "\n".join(header), "config": "\n".join(config), "names": _remane_duplicate_columns(names), @@ -217,7 +222,7 @@ def from_bl(fname): return df -def from_btl(fname, name=None): +def from_btl(fname): """ DataFrame constructor to open Seabird CTD BTL-ASCII format. @@ -266,8 +271,9 @@ def from_btl(fname, name=None): df["Statistic"] = df["Statistic"].str.replace(r"\(|\)", "") # (avg) to avg - if name is None: + if "name" not in metadata: name = _basename(fname)[1] + metadata["name"] = str(name) dtypes = { "bpos": int, @@ -288,7 +294,6 @@ def from_btl(fname, name=None): warnings.warn("Could not convert %s to float." % column) df["Date"] = pd.to_datetime(df["Date"]) - metadata["name"] = str(name) setattr(df, "_metadata", metadata) return df diff --git a/tests/test_read.py b/tests/test_read.py index 4a87697..bf593ab 100644 --- a/tests/test_read.py +++ b/tests/test_read.py @@ -53,11 +53,6 @@ def btl(): yield ctd.from_btl(data_path.joinpath("btl", "bottletest.btl")) -@pytest.fixture -def btl_duplicate_header_name(): - yield ctd.from_btl(data_path.joinpath("btl", "alt_bottletest.BTL")) - - @pytest.fixture def btl_as_stream(): file = open(mode="rb", file=data_path.joinpath("btl", "alt_bottletest.BTL")) @@ -90,9 +85,9 @@ def test_btl_is_dataframe(btl): assert not btl.empty -def test_btl_with_dup_cols(btl_duplicate_header_name): +def test_btl_with_dup_cols(btl_as_stream): assert all( - col in btl_duplicate_header_name.columns for col in ["Bottle", "Bottle_"] + col in btl_as_stream.columns for col in ["Bottle", "Bottle_"] ) @@ -101,6 +96,11 @@ def test_btl_as_stringio(btl_as_stream): assert not btl_as_stream.empty +def test_btl_as_stringio_without_name(btl_as_stream): + assert isinstance(btl_as_stream, pd.DataFrame) + assert not btl_as_stream.empty + + def test_ros_is_dataframe(ros): assert isinstance(ros, pd.DataFrame) assert not ros.empty From d1b617e4ff7d4a37397861163e2cc5779ec981ba Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 24 May 2022 10:31:23 +0000 Subject: [PATCH 5/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_read.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/test_read.py b/tests/test_read.py index bf593ab..62fc3ce 100644 --- a/tests/test_read.py +++ b/tests/test_read.py @@ -86,9 +86,7 @@ def test_btl_is_dataframe(btl): def test_btl_with_dup_cols(btl_as_stream): - assert all( - col in btl_as_stream.columns for col in ["Bottle", "Bottle_"] - ) + assert all(col in btl_as_stream.columns for col in ["Bottle", "Bottle_"]) def test_btl_as_stringio(btl_as_stream): From 180f761dd0e645b3452b85cb4088f56e3ebd8153 Mon Sep 17 00:00:00 2001 From: Patrick Upson Date: Wed, 25 May 2022 18:37:45 -0300 Subject: [PATCH 6/7] Update ctd/read.py Co-authored-by: Filipe --- ctd/read.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ctd/read.py b/ctd/read.py index 7d00a13..e809736 100644 --- a/ctd/read.py +++ b/ctd/read.py @@ -181,7 +181,7 @@ def _parse_seabird(lines, ftype): names.append("Statistic") metadata.update( { - "name": fname if fname else "default_file", + "name": fname if fname else "unknown", "header": "\n".join(header), "config": "\n".join(config), "names": _remane_duplicate_columns(names), From b538acb7735425a7309a6842a6a05315d90c704b Mon Sep 17 00:00:00 2001 From: UpsonP Date: Tue, 31 May 2022 14:21:58 -0300 Subject: [PATCH 7/7] removed duplicate test --- tests/test_read.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/test_read.py b/tests/test_read.py index bf593ab..2bcb9ce 100644 --- a/tests/test_read.py +++ b/tests/test_read.py @@ -96,11 +96,6 @@ def test_btl_as_stringio(btl_as_stream): assert not btl_as_stream.empty -def test_btl_as_stringio_without_name(btl_as_stream): - assert isinstance(btl_as_stream, pd.DataFrame) - assert not btl_as_stream.empty - - def test_ros_is_dataframe(ros): assert isinstance(ros, pd.DataFrame) assert not ros.empty