From 15bbf19ed0a5751305373b0100b1164e2f8aa32c Mon Sep 17 00:00:00 2001 From: Theresa Mieslinger Date: Tue, 6 Aug 2024 23:48:03 +0200 Subject: [PATCH 1/4] refactor: explicitly create L1 in pipeline --- src/halodrops/helper/paths.py | 3 --- src/halodrops/pipeline.py | 11 ++++++++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/halodrops/helper/paths.py b/src/halodrops/helper/paths.py index 40a1ca1..28d0f23 100644 --- a/src/halodrops/helper/paths.py +++ b/src/halodrops/helper/paths.py @@ -139,9 +139,6 @@ def populate_sonde_instances(self) -> Dict: Sondes[sonde_id].add_flight_id(self.flight_id) Sondes[sonde_id].add_platform_id(self.platform_id) Sondes[sonde_id].add_afile(a_file) - if launch_detect: - Sondes[sonde_id].add_postaspenfile() - Sondes[sonde_id].add_aspen_ds() object.__setattr__(self, "Sondes", Sondes) diff --git a/src/halodrops/pipeline.py b/src/halodrops/pipeline.py index a81a947..8c16124 100644 --- a/src/halodrops/pipeline.py +++ b/src/halodrops/pipeline.py @@ -373,11 +373,20 @@ def run_pipeline(pipeline: dict, config: configparser.ConfigParser): "apply": create_and_populate_flight_object, "output": ["platforms", "sondes"], }, - "qc": { + "create_L1": { "intake": "sondes", "apply": iterate_Sonde_method_over_dict_of_Sondes_objects, "functions": [ "filter_no_launch_detect", + "add_postaspenfile", + "add_aspen_ds", + ], + "output": "sondes", + }, + "qc": { + "intake": "sondes", + "apply": iterate_Sonde_method_over_dict_of_Sondes_objects, + "functions": [ "detect_floater", "profile_fullness", "near_surface_coverage", From d51d6c13cbfcb0fa66d7e7680b4f73027f257a8d Mon Sep 17 00:00:00 2001 From: Theresa Mieslinger Date: Wed, 7 Aug 2024 00:50:58 +0200 Subject: [PATCH 2/4] add L0 to L1 processing via docker image if post-ASPEN file does not exist --- src/halodrops/pipeline.py | 2 +- src/halodrops/processor.py | 67 ++++++++++++++++++-------------------- 2 files changed, 32 insertions(+), 37 deletions(-) diff --git a/src/halodrops/pipeline.py b/src/halodrops/pipeline.py index 8c16124..f6d6cc3 100644 --- a/src/halodrops/pipeline.py +++ b/src/halodrops/pipeline.py @@ -378,7 +378,7 @@ def run_pipeline(pipeline: dict, config: configparser.ConfigParser): "apply": iterate_Sonde_method_over_dict_of_Sondes_objects, "functions": [ "filter_no_launch_detect", - "add_postaspenfile", + "run_aspen", "add_aspen_ds", ], "output": "sondes", diff --git a/src/halodrops/processor.py b/src/halodrops/processor.py index 6ab306a..914445a 100644 --- a/src/halodrops/processor.py +++ b/src/halodrops/processor.py @@ -3,6 +3,7 @@ import datetime from typing import Any, Optional, List import os +import subprocess import numpy as np import xarray as xr @@ -106,13 +107,13 @@ def add_afile(self, path_to_afile: str) -> None: object.__setattr__(self, "afile", path_to_afile) return self - def add_postaspenfile(self, path_to_postaspenfile: str = None) -> None: - """Sets attribute with path to post-ASPEN file of the sonde + def run_aspen(self, path_to_postaspenfile: str = None) -> None: + """Runs aspen and sets attribute with path to post-ASPEN file of the sonde If the A-file path is known for the sonde, i.e. if the attribute `path_to_afile` exists, then the function will attempt to look for a post-ASPEN file of the same date-time as in the A-file's name. Sometimes, the post-ASPEN file might not exist (e.g. because launch was not detected), and in - such cases, an exception will be raised. + such cases, ASPEN will run in a docker image and create the file. If the A-file path is not known for the sonde, the function will expect the argument `path_to_postaspenfile` to be not empty. @@ -122,47 +123,41 @@ def add_postaspenfile(self, path_to_postaspenfile: str = None) -> None: path_to_postaspenfile : str, optional The path to the post-ASPEN file. If not provided, the function will attempt to construct the path from the `afile` attribute. - Raises - ------ - ValueError - If the `afile` attribute does not exist when `path_to_postaspenfile` is not provided. - If the post-ASPEN file does not exist at the constructed or provided path, and launch was detected in the A-file. - If the launch was not detected in the A-file. - Attributes Set -------------- postaspenfile : str The path to the post-ASPEN file. This attribute is set if the file exists at the constructed or provided path. """ + l0dir = os.path.dirname(self.afile) + aname = os.path.basename(self.afile) + dname = "D" + aname[1:] + l1dir = l0dir[:-1] + "1" + l1name = dname.split(".")[0] + "QC.nc" + if path_to_postaspenfile is None: - if hasattr(self, "afile"): - path_to_l1dir = os.path.dirname(self.afile)[:-1] + "1" - postaspenfile = ( - "D" + os.path.basename(self.afile).split(".")[0][1:] + "QC.nc" - ) - path_to_postaspenfile = os.path.join(path_to_l1dir, postaspenfile) - if os.path.exists(path_to_postaspenfile): - object.__setattr__(self, "postaspenfile", path_to_postaspenfile) - else: - if rr.check_launch_detect_in_afile(self.afile): - raise ValueError( - f"The post-ASPEN file for {self.serial_id} with filename {postaspenfile} does not exist. Therefore, I am not setting the `postaspenfile` attribute. I checked and found that launch was detected for {self.serial_id}." - ) - else: - raise ValueError( - f"Launch not detected for {self.serial_id}. Therefore, {postaspenfile} does not exist and I am not setting the `postaspenfile` attribute." - ) - else: - raise ValueError("The attribute `path_to_afile` doesn't exist.") + path_to_postaspenfile = os.path.join(l1dir, l1name) + + if not os.path.exists(path_to_postaspenfile): + subprocess.run( + [ + "docker", + "run", + "--rm", + "--mount", + f"type=bind,source={l0dir},target=/input", + "--mount", + f"type=bind,source={l1dir},target=/output", + "ghcr.io/atmdrops/aspenqc:4.0.2", + "-i", + f"/input/{dname}", + "-n", + f"/output/{l1name}", + ], + check=True, + ) - else: - if os.path.exists(path_to_postaspenfile): - object.__setattr__(self, "postaspenfile", path_to_postaspenfile) - else: - raise ValueError( - f"The post-ASPEN file for your provided {path_to_postaspenfile=} does not exist. Therefore, I am not setting the `postaspenfile` attribute." - ) + object.__setattr__(self, "postaspenfile", path_to_postaspenfile) return self def add_aspen_ds(self) -> None: From e9f01a14c89dccd15efe3fd34f3fcf99d4e1e99f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20Ko=CC=88lling?= Date: Wed, 7 Aug 2024 23:59:50 +0200 Subject: [PATCH 3/4] fix tests after rename add_postaspenfile -> run_aspen The check for adding the postaspenfile without launch detect isn't useful anymore, as doing so currently has no defined behavior and it's expected that the pipeline filters no launch detect sondes prior to calling run_aspen. The other tests should still work the same. NOTE: This change ONLY adapts the existing tests to the new function name. It does not test if the processing actually does useful things. Testing this properly would require more complete testdata, which will be available after #113 gets merged. --- tests/test_sonde.py | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/tests/test_sonde.py b/tests/test_sonde.py index d0e467f..9447f87 100644 --- a/tests/test_sonde.py +++ b/tests/test_sonde.py @@ -87,25 +87,13 @@ def test_sonde_add_afile(temp_afile_launchdetected, temp_afile_nolaunchdetected) assert sonde.afile == temp_afile_nolaunchdetected -def test_sonde_add_postaspenfile_without_launch(temp_afile_nolaunchdetected): - """ - Test the addition of a post-ASPEN file when a launch has not been detected. - """ - sonde = Sonde(serial_id=s_id) - sonde.add_afile(temp_afile_nolaunchdetected) - with pytest.raises(ValueError): - sonde.add_postaspenfile() - - -def test_sonde_add_postaspenfile_with_only_afile( - temp_afile_launchdetected, temp_postaspenfile -): +def test_sonde_run_aspen_with_only_afile(temp_afile_launchdetected, temp_postaspenfile): """ Test the addition of a post-ASPEN file when an A-file has been added. """ sonde = Sonde(serial_id=s_id) sonde.add_afile(temp_afile_launchdetected) - sonde.add_postaspenfile() + sonde.run_aspen() assert sonde.postaspenfile == temp_postaspenfile @@ -115,7 +103,7 @@ def test_sonde_add_aspen_ds(temp_afile_launchdetected, temp_postaspenfile): """ sonde = Sonde(serial_id=s_id) sonde.add_afile(temp_afile_launchdetected) - sonde.add_postaspenfile(temp_postaspenfile) + sonde.run_aspen(temp_postaspenfile) sonde.add_aspen_ds() assert isinstance(sonde.aspen_ds, xr.Dataset) assert sonde.aspen_ds.attrs["SondeId"] == s_id @@ -129,6 +117,6 @@ def test_sonde_add_aspen_ds_with_mismatched_sonde_id( """ sonde = Sonde(serial_id=s_id[:-1]) sonde.add_afile(temp_afile_launchdetected) - sonde.add_postaspenfile(temp_postaspenfile) + sonde.run_aspen(temp_postaspenfile) with pytest.raises(ValueError): sonde.add_aspen_ds() From 712c2aea823f0beec99cd6756dd339c1f3b9e0f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20Ko=CC=88lling?= Date: Thu, 8 Aug 2024 00:17:27 +0200 Subject: [PATCH 4/4] run_aspen: ensure Level_1 folder exists --- src/halodrops/processor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/halodrops/processor.py b/src/halodrops/processor.py index 914445a..1f8e5cd 100644 --- a/src/halodrops/processor.py +++ b/src/halodrops/processor.py @@ -139,6 +139,7 @@ def run_aspen(self, path_to_postaspenfile: str = None) -> None: path_to_postaspenfile = os.path.join(l1dir, l1name) if not os.path.exists(path_to_postaspenfile): + os.makedirs(l1dir, exist_ok=True) subprocess.run( [ "docker",