From 70cbabf0352c3d8a5ac916d7941afac7d5528578 Mon Sep 17 00:00:00 2001 From: Geet George Date: Wed, 13 Dec 2023 15:49:13 +0100 Subject: [PATCH 01/12] add platform objects to output --- src/halodrops/pipeline.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/halodrops/pipeline.py b/src/halodrops/pipeline.py index 5800ec4..553c61d 100644 --- a/src/halodrops/pipeline.py +++ b/src/halodrops/pipeline.py @@ -196,7 +196,9 @@ def get_platforms(config): return platform_objects -def create_and_populate_flight_object(config: configparser.ConfigParser) -> Flight: +def create_and_populate_flight_object( + config: configparser.ConfigParser, +) -> (dict[Platform], dict[Sonde]): """ Creates a Flight object and populates it with A-files. @@ -210,8 +212,9 @@ def create_and_populate_flight_object(config: configparser.ConfigParser) -> Flig Flight A Flight object. """ - platform_objects = get_platforms(config) output = {} + platform_objects = get_platforms(config) + output["platforms"] = platform_objects output["sondes"] = {} for platform in platform_objects: for flight_id in platform_objects[platform].flight_ids: @@ -222,7 +225,7 @@ def create_and_populate_flight_object(config: configparser.ConfigParser) -> Flig platform_objects[platform].platform_directory_name, ) output["sondes"].update(flight.populate_sonde_instances()) - return output["sondes"] + return output["platforms"], output["sondes"] def iterate_Sonde_method_over_dict_of_Sondes_objects( @@ -368,7 +371,7 @@ def run_pipeline(pipeline: dict, config: configparser.ConfigParser): "create_flight": { "intake": None, "apply": create_and_populate_flight_object, - "output": "sondes", + "output": ["platforms", "sondes"], }, "qc": { "intake": "sondes", From 52a6599e438167f8847416ffa3d555c53b1ee291 Mon Sep 17 00:00:00 2001 From: Geet George Date: Wed, 13 Dec 2023 15:57:22 +0100 Subject: [PATCH 02/12] change sonde module name to processor --- src/halodrops/helper/paths.py | 2 +- src/halodrops/pipeline.py | 2 +- src/halodrops/{sonde.py => processor.py} | 21 +++++++++++++++++++++ tests/test_sonde.py | 3 +-- 4 files changed, 24 insertions(+), 4 deletions(-) rename src/halodrops/{sonde.py => processor.py} (98%) diff --git a/src/halodrops/helper/paths.py b/src/halodrops/helper/paths.py index ee7ccef..40a1ca1 100644 --- a/src/halodrops/helper/paths.py +++ b/src/halodrops/helper/paths.py @@ -5,7 +5,7 @@ import os.path from halodrops.helper import rawreader as rr -from halodrops.sonde import Sonde +from halodrops.processor import Sonde # create logger module_logger = logging.getLogger("halodrops.helper.paths") diff --git a/src/halodrops/pipeline.py b/src/halodrops/pipeline.py index 553c61d..0a84b03 100644 --- a/src/halodrops/pipeline.py +++ b/src/halodrops/pipeline.py @@ -1,5 +1,5 @@ from .helper.paths import Platform, Flight -from .sonde import Sonde +from .processor import Sonde import configparser import inspect import os diff --git a/src/halodrops/sonde.py b/src/halodrops/processor.py similarity index 98% rename from src/halodrops/sonde.py rename to src/halodrops/processor.py index 5835ae0..0d55c45 100644 --- a/src/halodrops/sonde.py +++ b/src/halodrops/processor.py @@ -772,3 +772,24 @@ def write_l2(self, l2_dir: str = None): self._interim_l2_ds.to_netcdf(os.path.join(l2_dir, self.l2_filename)) return self + + def add_l2_ds(self, l2_dir: str = None): + """ + Adds the L2 dataset as an attribute to the sonde object. + + Parameters + ---------- + l2_dir : str, optional + The directory to read the L2 file from. The default is the directory of the A-file with '0' replaced by '2'. + + Returns + ------- + self : object + Returns the sonde object with the L2 dataset added as an attribute. + """ + if l2_dir is None: + l2_dir = os.path.dirname(self.afile)[:-1] + "2" + + self.l2_ds = xr.open_dataset(os.path.join(l2_dir, self.l2_filename)) + + return self diff --git a/tests/test_sonde.py b/tests/test_sonde.py index e08fdbf..d0e467f 100644 --- a/tests/test_sonde.py +++ b/tests/test_sonde.py @@ -1,7 +1,7 @@ import pytest import os import xarray as xr -from halodrops.sonde import Sonde +from halodrops.processor import Sonde s_id = "test_this_id" launch_time = "2020-02-02 20:22:02" @@ -11,7 +11,6 @@ def test_Sonde_attrs(): - TestSonde_nolaunchtime = Sonde(s_id) TestSonde_withlaunchtime = Sonde(s_id, launch_time=launch_time) From 19b7a9f74308410499d38c9aced9ec4595c8f26b Mon Sep 17 00:00:00 2001 From: Geet George Date: Tue, 19 Dec 2023 17:33:26 +0100 Subject: [PATCH 03/12] add `detect_floater` to Sonde object For HALO-(AC)3, some sondes seem to have landed on ice. For example, 3 sondes in the 5-sonde circle flown close to the pole (last flight of the campaign, 20220412). Therefore, they kept transmitting data from the surface (outside of their design to sink, which they couldn't in this case). This causes problems with QC. The function added by this commit should detect if the sonde is a floater. To determine if a sonde is a floater is difficult with an algorithm because "floaters" behave quite differently. I decided to go for a simple check, i.e. if the gpsalt (altitude from gps) and pressure (pres) has not changed (by more than 1 m and 1 hPa, respectively) in a user-decided number of consecutive timesteps (default 3) for measurements at the surface (decided by gpsalt_threshold, default 25m), then the sonde is a floater. This is not perfect, but it should work for most cases. The function adds an attribute `is_floater`(True/False) and if that is True, it will also add another attribute `landing_time`. The latter attribute will be used to restrict measurements from the sonde to the time before it landed, for QC and for inclusion in to L2 and onwards. --- src/halodrops/processor.py | 65 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/src/halodrops/processor.py b/src/halodrops/processor.py index 0d55c45..39117a5 100644 --- a/src/halodrops/processor.py +++ b/src/halodrops/processor.py @@ -234,6 +234,71 @@ def filter_no_launch_detect(self) -> None: f"The attribute `launch_detect` does not exist for Sonde {self.serial_id}." ) + def detect_floater( + self, + gpsalt_threshold: float = 25, + consecutive_time_steps: int = 3, + skip: bool = False, + ): + """ + Detects if a sonde is a floater. + + Parameters + ---------- + gpsalt_threshold : float, optional + The gpsalt altitude below which the sonde will check for time periods when gpsalt and pres have not changed. Default is 25. + skip : bool, optional + If True, the function will return the object without performing any operations. Default is False. + + Returns + ------- + self + The object itself with the new `is_floater` attribute added based on the function parameters. + """ + if hh.get_bool(skip): + return self + else: + if isinstance(gpsalt_threshold, str): + gpsalt_threshold = float(gpsalt_threshold) + + if hasattr(self, "aspen_ds"): + surface_ds = ( + self.aspen_ds.where( + self.aspen_ds.gpsalt < gpsalt_threshold, drop=True + ) + .sortby("time") + .dropna(dim="time", how="any", subset=["pres", "gpsalt"]) + ) + gpsalt_diff = np.diff(surface_ds.gpsalt) + pressure_diff = np.diff(surface_ds.pres) + gpsalt_diff_below_threshold = ( + np.abs(gpsalt_diff) < 1 + ) # GPS altitude value at surface shouldn't change by more than 1 m + pressure_diff_below_threshold = ( + np.abs(pressure_diff) < 1 + ) # Pressure value at surface shouldn't change by more than 1 hPa + floater = gpsalt_diff_below_threshold & pressure_diff_below_threshold + if np.any(floater): + object.__setattr__(self, "is_floater", True) + for time_index in range(len(floater) - consecutive_time_steps + 1): + if np.all( + floater[time_index : time_index + consecutive_time_steps] + ): + landing_time = surface_ds.time[time_index - 1].values + break + + object.__setattr__(self, "landing_time", landing_time) + print( + f"{self.serial_id}: Floater detected! The landing time is estimated as {landing_time}." + ) + else: + object.__setattr__(self, "is_floater", False) + else: + raise ValueError( + "The attribute `aspen_ds` does not exist. Please run `add_aspen_ds` method first." + ) + return self + def profile_fullness( self, variable_dict={"u_wind": 4, "v_wind": 4, "rh": 2, "tdry": 2, "pres": 2}, From bff3ed475eeeac65e9df6bfbaf98cdf6a63979fa Mon Sep 17 00:00:00 2001 From: Geet George Date: Tue, 19 Dec 2023 21:01:02 +0100 Subject: [PATCH 04/12] near surface qc takes gpsalt if sonde is floater --- src/halodrops/processor.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/halodrops/processor.py b/src/halodrops/processor.py index 39117a5..d913399 100644 --- a/src/halodrops/processor.py +++ b/src/halodrops/processor.py @@ -399,7 +399,7 @@ def near_surface_coverage( alt_bounds : list, optional The lower and upper bounds of altitude in meters to consider for the calculation. Defaults to [0,1000]. alt_dimension_name : str, optional - The name of the altitude dimension. Defaults to "alt". + The name of the altitude dimension. Defaults to "alt". If the sonde is a floater, this will be set to "gpsalt" regardless of user-provided value. count_threshold : int, optional The minimum count of non-null values required for a variable to be considered as having near surface coverage. Defaults to 50. add_near_surface_count_attribute : bool, optional @@ -425,6 +425,14 @@ def near_surface_coverage( "The attribute `aspen_ds` does not exist. Please run `add_aspen_ds` method first." ) + if not hasattr(self, "is_floater"): + raise ValueError( + "The attribute `is_floater` does not exist. Please run `detect_floater` method first." + ) + + if self.is_floater: + alt_dimension_name = "gpsalt" + if isinstance(alt_bounds, str): alt_bounds = alt_bounds.split(",") alt_bounds = [float(alt_bound) for alt_bound in alt_bounds] From 594f3821aaa4dc40d3264027b32452763dffa6f1 Mon Sep 17 00:00:00 2001 From: Geet George Date: Tue, 19 Dec 2023 21:26:42 +0100 Subject: [PATCH 05/12] crop aspen_ds if sonde is a floater if a sonde is detected as a floater, this method will create another attr called `cropped_aspen_ds` which will be the same xr.Dataset as `aspen_ds` except the time coordinates will be cut-off where greater than the `landing_time` attr. this is so that later on, QC checks (e.g. profile_fullness) and data levels from L2 onwards can use the cropped_aspen_ds for floaters. --- src/halodrops/processor.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/halodrops/processor.py b/src/halodrops/processor.py index d913399..0516644 100644 --- a/src/halodrops/processor.py +++ b/src/halodrops/processor.py @@ -299,6 +299,33 @@ def detect_floater( ) return self + def crop_aspen_ds_to_landing_time(self): + """ + Crops the aspen_ds to the time period before landing. + + Parameters + ---------- + None + + Returns + ------- + self + The object itself with the new `cropped_aspen_ds` attribute added if the sonde is a floater. + """ + if hasattr(self, "is_floater"): + if self.is_floater: + if hasattr(self, "landing_time"): + object.__setattr__( + self, + "cropped_aspen_ds", + self.aspen_ds.sel(time=slice(self.landing_time, None)), + ) + else: + raise ValueError( + "The attribute `is_floater` does not exist. Please run `detect_floater` method first." + ) + return self + def profile_fullness( self, variable_dict={"u_wind": 4, "v_wind": 4, "rh": 2, "tdry": 2, "pres": 2}, From 3bf4060aee5968d5660dd2b32ab9b2c56ed7114d Mon Sep 17 00:00:00 2001 From: Geet George Date: Tue, 19 Dec 2023 21:29:50 +0100 Subject: [PATCH 06/12] profile_fullness qc uses cropped_aspen if floater --- src/halodrops/processor.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/halodrops/processor.py b/src/halodrops/processor.py index 0516644..cb5e3ae 100644 --- a/src/halodrops/processor.py +++ b/src/halodrops/processor.py @@ -337,6 +337,8 @@ def profile_fullness( ): """ Calculates the profile coverage for a given set of variables, considering their sampling frequency. + If the sonde is a floater, the function will take the `cropped_aspen_ds` attribute + (calculated with the `crop_aspen_ds_to_landing_time` method) as the dataset to calculate the profile coverage. This function assumes that the time_dimension coordinates are spaced over 0.25 seconds, implying a timestamp_frequency of 4 hertz. This is applicable for ASPEN-processed QC and PQC files, @@ -380,7 +382,13 @@ def profile_fullness( fullness_threshold = float(fullness_threshold) for variable, sampling_frequency in variable_dict.items(): - dataset = self.aspen_ds[variable] + if self.is_floater: + if not hasattr(self, "cropped_aspen_ds"): + self.crop_aspen_ds_to_landing_time() + dataset = self.cropped_aspen_ds[variable] + else: + dataset = self.aspen_ds[variable] + weighed_time_size = len(dataset[time_dimension]) / ( timestamp_frequency / sampling_frequency ) From c80ccdec8f5290171832e9ce89802362e86ed927 Mon Sep 17 00:00:00 2001 From: Geet George Date: Tue, 19 Dec 2023 21:39:31 +0100 Subject: [PATCH 07/12] if floater, cropped_aspen_ds for L2 onwards if the sonde is a floater, then the data after landing shouldn't go past QC. Therefore, for L2 and onwards, the dataset would be ASPEN cropped to landing time, i.e. the `cropped_aspen_ds` attribute created with the `crop_aspen_ds_to_landing_time` method. This is so that users do not have to deal with problems that usually come around with floaters. Of course, it is important to know when the ASPEN dataset (i.e. L1) has been changed because the sonde was a floater. Therefore, every L2 file will now carry a global attribute called `is_floater` which will indicate if the sonde is a floater. --- src/halodrops/processor.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/halodrops/processor.py b/src/halodrops/processor.py index cb5e3ae..c441512 100644 --- a/src/halodrops/processor.py +++ b/src/halodrops/processor.py @@ -583,6 +583,30 @@ def filter_qc_fail(self, filter_flags=None): return self + def create_interim_l2_ds(self): + """ + Creates an interim L2 dataset from the aspen_ds or cropped_aspen_ds attribute. + + Parameters + ---------- + None + + Returns + ------- + self : object + Returns the sonde object with the interim L2 dataset added as an attribute. + """ + if self.is_floater: + if not hasattr(self, "cropped_aspen_ds"): + self.crop_aspen_ds_to_landing_time() + ds = self.cropped_aspen_ds + else: + ds = self.aspen_ds + + object.__setattr__(self, "_interim_l2_ds", ds) + + return self + def convert_to_si(self, variables=["rh", "pres", "tdry"], skip=False): """ Converts variables to SI units. @@ -754,6 +778,7 @@ def get_other_global_attributes(self): "launch_time_(UTC)": str(self.aspen_ds.launch_time.values) if hasattr(self.aspen_ds, "launch_time") else str(self.aspen_ds.base_time.values), + "is_floater": self.is_floater, "sonde_serial_ID": self.serial_id, "author": "Geet George", "author_email": "g.george@tudelft.nl", From 345924864bbc0e66df87f7d873f933995db5885a Mon Sep 17 00:00:00 2001 From: Geet George Date: Tue, 19 Dec 2023 21:44:58 +0100 Subject: [PATCH 08/12] deal with floaters in QC and L2 in main pipeline --- src/halodrops/pipeline.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/halodrops/pipeline.py b/src/halodrops/pipeline.py index 0a84b03..278e56f 100644 --- a/src/halodrops/pipeline.py +++ b/src/halodrops/pipeline.py @@ -378,6 +378,7 @@ def run_pipeline(pipeline: dict, config: configparser.ConfigParser): "apply": iterate_Sonde_method_over_dict_of_Sondes_objects, "functions": [ "filter_no_launch_detect", + "detect_floater", "profile_fullness", "near_surface_coverage", "filter_qc_fail", @@ -388,6 +389,7 @@ def run_pipeline(pipeline: dict, config: configparser.ConfigParser): "intake": "sondes", "apply": iterate_Sonde_method_over_dict_of_Sondes_objects, "functions": [ + "create_interim_l2_ds", "convert_to_si", "get_l2_variables", "add_compression_and_encoding_properties", From 81380b18efd3e525bbbfaeb480e4c3773a978cb3 Mon Sep 17 00:00:00 2001 From: Geet George Date: Tue, 19 Dec 2023 21:50:14 +0100 Subject: [PATCH 09/12] add floater global attr as str --- src/halodrops/processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/halodrops/processor.py b/src/halodrops/processor.py index c441512..e49adad 100644 --- a/src/halodrops/processor.py +++ b/src/halodrops/processor.py @@ -778,7 +778,7 @@ def get_other_global_attributes(self): "launch_time_(UTC)": str(self.aspen_ds.launch_time.values) if hasattr(self.aspen_ds, "launch_time") else str(self.aspen_ds.base_time.values), - "is_floater": self.is_floater, + "is_floater": self.is_floater.__str__(), "sonde_serial_ID": self.serial_id, "author": "Geet George", "author_email": "g.george@tudelft.nl", From aeab53e2278d8da4d35cb617b7102abc5a65ba7a Mon Sep 17 00:00:00 2001 From: Geet George Date: Mon, 22 Jan 2024 00:53:19 +0100 Subject: [PATCH 10/12] landing_time attr set within if conditional --- src/halodrops/processor.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/halodrops/processor.py b/src/halodrops/processor.py index e49adad..e5d5314 100644 --- a/src/halodrops/processor.py +++ b/src/halodrops/processor.py @@ -285,12 +285,18 @@ def detect_floater( floater[time_index : time_index + consecutive_time_steps] ): landing_time = surface_ds.time[time_index - 1].values + object.__setattr__(self, "landing_time", landing_time) + print( + f"{self.serial_id}: Floater detected! The landing time is estimated as {landing_time}." + ) break - - object.__setattr__(self, "landing_time", landing_time) - print( - f"{self.serial_id}: Floater detected! The landing time is estimated as {landing_time}." - ) + if not hasattr(self, "landing_time"): + print( + f"{self.serial_id}: Floater detected! However, the landing time could not be estimated. Therefore setting landing time as {surface_ds.time[0].values}" + ) + object.__setattr__( + self, "landing_time", surface_ds.time[0].values + ) else: object.__setattr__(self, "is_floater", False) else: @@ -331,7 +337,7 @@ def profile_fullness( variable_dict={"u_wind": 4, "v_wind": 4, "rh": 2, "tdry": 2, "pres": 2}, time_dimension="time", timestamp_frequency=4, - fullness_threshold=0.8, + fullness_threshold=0.75, add_fullness_fraction_attribute=False, skip=False, ): From a5c0c46ac76b67c6f8d88d543d5a40cdad15412e Mon Sep 17 00:00:00 2001 From: Geet George Date: Mon, 22 Jan 2024 01:06:14 +0100 Subject: [PATCH 11/12] correct attribute setting for l2_ds --- src/halodrops/processor.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/halodrops/processor.py b/src/halodrops/processor.py index e5d5314..78c1e10 100644 --- a/src/halodrops/processor.py +++ b/src/halodrops/processor.py @@ -929,6 +929,8 @@ def add_l2_ds(self, l2_dir: str = None): if l2_dir is None: l2_dir = os.path.dirname(self.afile)[:-1] + "2" - self.l2_ds = xr.open_dataset(os.path.join(l2_dir, self.l2_filename)) + object.__setattr__( + self, "l2_ds", xr.open_dataset(os.path.join(l2_dir, self.l2_filename)) + ) return self From e295fe4385e6dce602e409f5d40379a82f5db5c9 Mon Sep 17 00:00:00 2001 From: Geet George Date: Wed, 19 Jun 2024 23:59:04 +0200 Subject: [PATCH 12/12] remove api.qc call in init (closes #108) --- src/halodrops/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/halodrops/__init__.py b/src/halodrops/__init__.py index ee5c92a..7454dfe 100644 --- a/src/halodrops/__init__.py +++ b/src/halodrops/__init__.py @@ -35,7 +35,6 @@ def main(): import argparse import halodrops - import halodrops.api.qc as qc parser = argparse.ArgumentParser("Arguments")