From 19b7a9f74308410499d38c9aced9ec4595c8f26b Mon Sep 17 00:00:00 2001 From: Geet George Date: Tue, 19 Dec 2023 17:33:26 +0100 Subject: [PATCH 1/7] add `detect_floater` to Sonde object For HALO-(AC)3, some sondes seem to have landed on ice. For example, 3 sondes in the 5-sonde circle flown close to the pole (last flight of the campaign, 20220412). Therefore, they kept transmitting data from the surface (outside of their design to sink, which they couldn't in this case). This causes problems with QC. The function added by this commit should detect if the sonde is a floater. To determine if a sonde is a floater is difficult with an algorithm because "floaters" behave quite differently. I decided to go for a simple check, i.e. if the gpsalt (altitude from gps) and pressure (pres) has not changed (by more than 1 m and 1 hPa, respectively) in a user-decided number of consecutive timesteps (default 3) for measurements at the surface (decided by gpsalt_threshold, default 25m), then the sonde is a floater. This is not perfect, but it should work for most cases. The function adds an attribute `is_floater`(True/False) and if that is True, it will also add another attribute `landing_time`. The latter attribute will be used to restrict measurements from the sonde to the time before it landed, for QC and for inclusion in to L2 and onwards. --- src/halodrops/processor.py | 65 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/src/halodrops/processor.py b/src/halodrops/processor.py index 0d55c45..39117a5 100644 --- a/src/halodrops/processor.py +++ b/src/halodrops/processor.py @@ -234,6 +234,71 @@ def filter_no_launch_detect(self) -> None: f"The attribute `launch_detect` does not exist for Sonde {self.serial_id}." ) + def detect_floater( + self, + gpsalt_threshold: float = 25, + consecutive_time_steps: int = 3, + skip: bool = False, + ): + """ + Detects if a sonde is a floater. + + Parameters + ---------- + gpsalt_threshold : float, optional + The gpsalt altitude below which the sonde will check for time periods when gpsalt and pres have not changed. Default is 25. + skip : bool, optional + If True, the function will return the object without performing any operations. Default is False. + + Returns + ------- + self + The object itself with the new `is_floater` attribute added based on the function parameters. + """ + if hh.get_bool(skip): + return self + else: + if isinstance(gpsalt_threshold, str): + gpsalt_threshold = float(gpsalt_threshold) + + if hasattr(self, "aspen_ds"): + surface_ds = ( + self.aspen_ds.where( + self.aspen_ds.gpsalt < gpsalt_threshold, drop=True + ) + .sortby("time") + .dropna(dim="time", how="any", subset=["pres", "gpsalt"]) + ) + gpsalt_diff = np.diff(surface_ds.gpsalt) + pressure_diff = np.diff(surface_ds.pres) + gpsalt_diff_below_threshold = ( + np.abs(gpsalt_diff) < 1 + ) # GPS altitude value at surface shouldn't change by more than 1 m + pressure_diff_below_threshold = ( + np.abs(pressure_diff) < 1 + ) # Pressure value at surface shouldn't change by more than 1 hPa + floater = gpsalt_diff_below_threshold & pressure_diff_below_threshold + if np.any(floater): + object.__setattr__(self, "is_floater", True) + for time_index in range(len(floater) - consecutive_time_steps + 1): + if np.all( + floater[time_index : time_index + consecutive_time_steps] + ): + landing_time = surface_ds.time[time_index - 1].values + break + + object.__setattr__(self, "landing_time", landing_time) + print( + f"{self.serial_id}: Floater detected! The landing time is estimated as {landing_time}." + ) + else: + object.__setattr__(self, "is_floater", False) + else: + raise ValueError( + "The attribute `aspen_ds` does not exist. Please run `add_aspen_ds` method first." + ) + return self + def profile_fullness( self, variable_dict={"u_wind": 4, "v_wind": 4, "rh": 2, "tdry": 2, "pres": 2}, From bff3ed475eeeac65e9df6bfbaf98cdf6a63979fa Mon Sep 17 00:00:00 2001 From: Geet George Date: Tue, 19 Dec 2023 21:01:02 +0100 Subject: [PATCH 2/7] near surface qc takes gpsalt if sonde is floater --- src/halodrops/processor.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/halodrops/processor.py b/src/halodrops/processor.py index 39117a5..d913399 100644 --- a/src/halodrops/processor.py +++ b/src/halodrops/processor.py @@ -399,7 +399,7 @@ def near_surface_coverage( alt_bounds : list, optional The lower and upper bounds of altitude in meters to consider for the calculation. Defaults to [0,1000]. alt_dimension_name : str, optional - The name of the altitude dimension. Defaults to "alt". + The name of the altitude dimension. Defaults to "alt". If the sonde is a floater, this will be set to "gpsalt" regardless of user-provided value. count_threshold : int, optional The minimum count of non-null values required for a variable to be considered as having near surface coverage. Defaults to 50. add_near_surface_count_attribute : bool, optional @@ -425,6 +425,14 @@ def near_surface_coverage( "The attribute `aspen_ds` does not exist. Please run `add_aspen_ds` method first." ) + if not hasattr(self, "is_floater"): + raise ValueError( + "The attribute `is_floater` does not exist. Please run `detect_floater` method first." + ) + + if self.is_floater: + alt_dimension_name = "gpsalt" + if isinstance(alt_bounds, str): alt_bounds = alt_bounds.split(",") alt_bounds = [float(alt_bound) for alt_bound in alt_bounds] From 594f3821aaa4dc40d3264027b32452763dffa6f1 Mon Sep 17 00:00:00 2001 From: Geet George Date: Tue, 19 Dec 2023 21:26:42 +0100 Subject: [PATCH 3/7] crop aspen_ds if sonde is a floater if a sonde is detected as a floater, this method will create another attr called `cropped_aspen_ds` which will be the same xr.Dataset as `aspen_ds` except the time coordinates will be cut-off where greater than the `landing_time` attr. this is so that later on, QC checks (e.g. profile_fullness) and data levels from L2 onwards can use the cropped_aspen_ds for floaters. --- src/halodrops/processor.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/halodrops/processor.py b/src/halodrops/processor.py index d913399..0516644 100644 --- a/src/halodrops/processor.py +++ b/src/halodrops/processor.py @@ -299,6 +299,33 @@ def detect_floater( ) return self + def crop_aspen_ds_to_landing_time(self): + """ + Crops the aspen_ds to the time period before landing. + + Parameters + ---------- + None + + Returns + ------- + self + The object itself with the new `cropped_aspen_ds` attribute added if the sonde is a floater. + """ + if hasattr(self, "is_floater"): + if self.is_floater: + if hasattr(self, "landing_time"): + object.__setattr__( + self, + "cropped_aspen_ds", + self.aspen_ds.sel(time=slice(self.landing_time, None)), + ) + else: + raise ValueError( + "The attribute `is_floater` does not exist. Please run `detect_floater` method first." + ) + return self + def profile_fullness( self, variable_dict={"u_wind": 4, "v_wind": 4, "rh": 2, "tdry": 2, "pres": 2}, From 3bf4060aee5968d5660dd2b32ab9b2c56ed7114d Mon Sep 17 00:00:00 2001 From: Geet George Date: Tue, 19 Dec 2023 21:29:50 +0100 Subject: [PATCH 4/7] profile_fullness qc uses cropped_aspen if floater --- src/halodrops/processor.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/halodrops/processor.py b/src/halodrops/processor.py index 0516644..cb5e3ae 100644 --- a/src/halodrops/processor.py +++ b/src/halodrops/processor.py @@ -337,6 +337,8 @@ def profile_fullness( ): """ Calculates the profile coverage for a given set of variables, considering their sampling frequency. + If the sonde is a floater, the function will take the `cropped_aspen_ds` attribute + (calculated with the `crop_aspen_ds_to_landing_time` method) as the dataset to calculate the profile coverage. This function assumes that the time_dimension coordinates are spaced over 0.25 seconds, implying a timestamp_frequency of 4 hertz. This is applicable for ASPEN-processed QC and PQC files, @@ -380,7 +382,13 @@ def profile_fullness( fullness_threshold = float(fullness_threshold) for variable, sampling_frequency in variable_dict.items(): - dataset = self.aspen_ds[variable] + if self.is_floater: + if not hasattr(self, "cropped_aspen_ds"): + self.crop_aspen_ds_to_landing_time() + dataset = self.cropped_aspen_ds[variable] + else: + dataset = self.aspen_ds[variable] + weighed_time_size = len(dataset[time_dimension]) / ( timestamp_frequency / sampling_frequency ) From c80ccdec8f5290171832e9ce89802362e86ed927 Mon Sep 17 00:00:00 2001 From: Geet George Date: Tue, 19 Dec 2023 21:39:31 +0100 Subject: [PATCH 5/7] if floater, cropped_aspen_ds for L2 onwards if the sonde is a floater, then the data after landing shouldn't go past QC. Therefore, for L2 and onwards, the dataset would be ASPEN cropped to landing time, i.e. the `cropped_aspen_ds` attribute created with the `crop_aspen_ds_to_landing_time` method. This is so that users do not have to deal with problems that usually come around with floaters. Of course, it is important to know when the ASPEN dataset (i.e. L1) has been changed because the sonde was a floater. Therefore, every L2 file will now carry a global attribute called `is_floater` which will indicate if the sonde is a floater. --- src/halodrops/processor.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/halodrops/processor.py b/src/halodrops/processor.py index cb5e3ae..c441512 100644 --- a/src/halodrops/processor.py +++ b/src/halodrops/processor.py @@ -583,6 +583,30 @@ def filter_qc_fail(self, filter_flags=None): return self + def create_interim_l2_ds(self): + """ + Creates an interim L2 dataset from the aspen_ds or cropped_aspen_ds attribute. + + Parameters + ---------- + None + + Returns + ------- + self : object + Returns the sonde object with the interim L2 dataset added as an attribute. + """ + if self.is_floater: + if not hasattr(self, "cropped_aspen_ds"): + self.crop_aspen_ds_to_landing_time() + ds = self.cropped_aspen_ds + else: + ds = self.aspen_ds + + object.__setattr__(self, "_interim_l2_ds", ds) + + return self + def convert_to_si(self, variables=["rh", "pres", "tdry"], skip=False): """ Converts variables to SI units. @@ -754,6 +778,7 @@ def get_other_global_attributes(self): "launch_time_(UTC)": str(self.aspen_ds.launch_time.values) if hasattr(self.aspen_ds, "launch_time") else str(self.aspen_ds.base_time.values), + "is_floater": self.is_floater, "sonde_serial_ID": self.serial_id, "author": "Geet George", "author_email": "g.george@tudelft.nl", From 345924864bbc0e66df87f7d873f933995db5885a Mon Sep 17 00:00:00 2001 From: Geet George Date: Tue, 19 Dec 2023 21:44:58 +0100 Subject: [PATCH 6/7] deal with floaters in QC and L2 in main pipeline --- src/halodrops/pipeline.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/halodrops/pipeline.py b/src/halodrops/pipeline.py index 0a84b03..278e56f 100644 --- a/src/halodrops/pipeline.py +++ b/src/halodrops/pipeline.py @@ -378,6 +378,7 @@ def run_pipeline(pipeline: dict, config: configparser.ConfigParser): "apply": iterate_Sonde_method_over_dict_of_Sondes_objects, "functions": [ "filter_no_launch_detect", + "detect_floater", "profile_fullness", "near_surface_coverage", "filter_qc_fail", @@ -388,6 +389,7 @@ def run_pipeline(pipeline: dict, config: configparser.ConfigParser): "intake": "sondes", "apply": iterate_Sonde_method_over_dict_of_Sondes_objects, "functions": [ + "create_interim_l2_ds", "convert_to_si", "get_l2_variables", "add_compression_and_encoding_properties", From 81380b18efd3e525bbbfaeb480e4c3773a978cb3 Mon Sep 17 00:00:00 2001 From: Geet George Date: Tue, 19 Dec 2023 21:50:14 +0100 Subject: [PATCH 7/7] add floater global attr as str --- src/halodrops/processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/halodrops/processor.py b/src/halodrops/processor.py index c441512..e49adad 100644 --- a/src/halodrops/processor.py +++ b/src/halodrops/processor.py @@ -778,7 +778,7 @@ def get_other_global_attributes(self): "launch_time_(UTC)": str(self.aspen_ds.launch_time.values) if hasattr(self.aspen_ds, "launch_time") else str(self.aspen_ds.base_time.values), - "is_floater": self.is_floater, + "is_floater": self.is_floater.__str__(), "sonde_serial_ID": self.serial_id, "author": "Geet George", "author_email": "g.george@tudelft.nl",