diff --git a/src/halodrops/pipeline.py b/src/halodrops/pipeline.py index 0a84b03..278e56f 100644 --- a/src/halodrops/pipeline.py +++ b/src/halodrops/pipeline.py @@ -378,6 +378,7 @@ def run_pipeline(pipeline: dict, config: configparser.ConfigParser): "apply": iterate_Sonde_method_over_dict_of_Sondes_objects, "functions": [ "filter_no_launch_detect", + "detect_floater", "profile_fullness", "near_surface_coverage", "filter_qc_fail", @@ -388,6 +389,7 @@ def run_pipeline(pipeline: dict, config: configparser.ConfigParser): "intake": "sondes", "apply": iterate_Sonde_method_over_dict_of_Sondes_objects, "functions": [ + "create_interim_l2_ds", "convert_to_si", "get_l2_variables", "add_compression_and_encoding_properties", diff --git a/src/halodrops/processor.py b/src/halodrops/processor.py index 0d55c45..e49adad 100644 --- a/src/halodrops/processor.py +++ b/src/halodrops/processor.py @@ -234,6 +234,98 @@ def filter_no_launch_detect(self) -> None: f"The attribute `launch_detect` does not exist for Sonde {self.serial_id}." ) + def detect_floater( + self, + gpsalt_threshold: float = 25, + consecutive_time_steps: int = 3, + skip: bool = False, + ): + """ + Detects if a sonde is a floater. + + Parameters + ---------- + gpsalt_threshold : float, optional + The gpsalt altitude below which the sonde will check for time periods when gpsalt and pres have not changed. Default is 25. + skip : bool, optional + If True, the function will return the object without performing any operations. Default is False. + + Returns + ------- + self + The object itself with the new `is_floater` attribute added based on the function parameters. + """ + if hh.get_bool(skip): + return self + else: + if isinstance(gpsalt_threshold, str): + gpsalt_threshold = float(gpsalt_threshold) + + if hasattr(self, "aspen_ds"): + surface_ds = ( + self.aspen_ds.where( + self.aspen_ds.gpsalt < gpsalt_threshold, drop=True + ) + .sortby("time") + .dropna(dim="time", how="any", subset=["pres", "gpsalt"]) + ) + gpsalt_diff = np.diff(surface_ds.gpsalt) + pressure_diff = np.diff(surface_ds.pres) + gpsalt_diff_below_threshold = ( + np.abs(gpsalt_diff) < 1 + ) # GPS altitude value at surface shouldn't change by more than 1 m + pressure_diff_below_threshold = ( + np.abs(pressure_diff) < 1 + ) # Pressure value at surface shouldn't change by more than 1 hPa + floater = gpsalt_diff_below_threshold & pressure_diff_below_threshold + if np.any(floater): + object.__setattr__(self, "is_floater", True) + for time_index in range(len(floater) - consecutive_time_steps + 1): + if np.all( + floater[time_index : time_index + consecutive_time_steps] + ): + landing_time = surface_ds.time[time_index - 1].values + break + + object.__setattr__(self, "landing_time", landing_time) + print( + f"{self.serial_id}: Floater detected! The landing time is estimated as {landing_time}." + ) + else: + object.__setattr__(self, "is_floater", False) + else: + raise ValueError( + "The attribute `aspen_ds` does not exist. Please run `add_aspen_ds` method first." + ) + return self + + def crop_aspen_ds_to_landing_time(self): + """ + Crops the aspen_ds to the time period before landing. + + Parameters + ---------- + None + + Returns + ------- + self + The object itself with the new `cropped_aspen_ds` attribute added if the sonde is a floater. + """ + if hasattr(self, "is_floater"): + if self.is_floater: + if hasattr(self, "landing_time"): + object.__setattr__( + self, + "cropped_aspen_ds", + self.aspen_ds.sel(time=slice(self.landing_time, None)), + ) + else: + raise ValueError( + "The attribute `is_floater` does not exist. Please run `detect_floater` method first." + ) + return self + def profile_fullness( self, variable_dict={"u_wind": 4, "v_wind": 4, "rh": 2, "tdry": 2, "pres": 2}, @@ -245,6 +337,8 @@ def profile_fullness( ): """ Calculates the profile coverage for a given set of variables, considering their sampling frequency. + If the sonde is a floater, the function will take the `cropped_aspen_ds` attribute + (calculated with the `crop_aspen_ds_to_landing_time` method) as the dataset to calculate the profile coverage. This function assumes that the time_dimension coordinates are spaced over 0.25 seconds, implying a timestamp_frequency of 4 hertz. This is applicable for ASPEN-processed QC and PQC files, @@ -288,7 +382,13 @@ def profile_fullness( fullness_threshold = float(fullness_threshold) for variable, sampling_frequency in variable_dict.items(): - dataset = self.aspen_ds[variable] + if self.is_floater: + if not hasattr(self, "cropped_aspen_ds"): + self.crop_aspen_ds_to_landing_time() + dataset = self.cropped_aspen_ds[variable] + else: + dataset = self.aspen_ds[variable] + weighed_time_size = len(dataset[time_dimension]) / ( timestamp_frequency / sampling_frequency ) @@ -334,7 +434,7 @@ def near_surface_coverage( alt_bounds : list, optional The lower and upper bounds of altitude in meters to consider for the calculation. Defaults to [0,1000]. alt_dimension_name : str, optional - The name of the altitude dimension. Defaults to "alt". + The name of the altitude dimension. Defaults to "alt". If the sonde is a floater, this will be set to "gpsalt" regardless of user-provided value. count_threshold : int, optional The minimum count of non-null values required for a variable to be considered as having near surface coverage. Defaults to 50. add_near_surface_count_attribute : bool, optional @@ -360,6 +460,14 @@ def near_surface_coverage( "The attribute `aspen_ds` does not exist. Please run `add_aspen_ds` method first." ) + if not hasattr(self, "is_floater"): + raise ValueError( + "The attribute `is_floater` does not exist. Please run `detect_floater` method first." + ) + + if self.is_floater: + alt_dimension_name = "gpsalt" + if isinstance(alt_bounds, str): alt_bounds = alt_bounds.split(",") alt_bounds = [float(alt_bound) for alt_bound in alt_bounds] @@ -475,6 +583,30 @@ def filter_qc_fail(self, filter_flags=None): return self + def create_interim_l2_ds(self): + """ + Creates an interim L2 dataset from the aspen_ds or cropped_aspen_ds attribute. + + Parameters + ---------- + None + + Returns + ------- + self : object + Returns the sonde object with the interim L2 dataset added as an attribute. + """ + if self.is_floater: + if not hasattr(self, "cropped_aspen_ds"): + self.crop_aspen_ds_to_landing_time() + ds = self.cropped_aspen_ds + else: + ds = self.aspen_ds + + object.__setattr__(self, "_interim_l2_ds", ds) + + return self + def convert_to_si(self, variables=["rh", "pres", "tdry"], skip=False): """ Converts variables to SI units. @@ -646,6 +778,7 @@ def get_other_global_attributes(self): "launch_time_(UTC)": str(self.aspen_ds.launch_time.values) if hasattr(self.aspen_ds, "launch_time") else str(self.aspen_ds.base_time.values), + "is_floater": self.is_floater.__str__(), "sonde_serial_ID": self.serial_id, "author": "Geet George", "author_email": "g.george@tudelft.nl",