diff --git a/src/halodrops/__init__.py b/src/halodrops/__init__.py index ee5c92a..7454dfe 100644 --- a/src/halodrops/__init__.py +++ b/src/halodrops/__init__.py @@ -35,7 +35,6 @@ def main(): import argparse import halodrops - import halodrops.api.qc as qc parser = argparse.ArgumentParser("Arguments") diff --git a/src/halodrops/helper/paths.py b/src/halodrops/helper/paths.py index ee7ccef..40a1ca1 100644 --- a/src/halodrops/helper/paths.py +++ b/src/halodrops/helper/paths.py @@ -5,7 +5,7 @@ import os.path from halodrops.helper import rawreader as rr -from halodrops.sonde import Sonde +from halodrops.processor import Sonde # create logger module_logger = logging.getLogger("halodrops.helper.paths") diff --git a/src/halodrops/pipeline.py b/src/halodrops/pipeline.py index 5800ec4..278e56f 100644 --- a/src/halodrops/pipeline.py +++ b/src/halodrops/pipeline.py @@ -1,5 +1,5 @@ from .helper.paths import Platform, Flight -from .sonde import Sonde +from .processor import Sonde import configparser import inspect import os @@ -196,7 +196,9 @@ def get_platforms(config): return platform_objects -def create_and_populate_flight_object(config: configparser.ConfigParser) -> Flight: +def create_and_populate_flight_object( + config: configparser.ConfigParser, +) -> (dict[Platform], dict[Sonde]): """ Creates a Flight object and populates it with A-files. @@ -210,8 +212,9 @@ def create_and_populate_flight_object(config: configparser.ConfigParser) -> Flig Flight A Flight object. """ - platform_objects = get_platforms(config) output = {} + platform_objects = get_platforms(config) + output["platforms"] = platform_objects output["sondes"] = {} for platform in platform_objects: for flight_id in platform_objects[platform].flight_ids: @@ -222,7 +225,7 @@ def create_and_populate_flight_object(config: configparser.ConfigParser) -> Flig platform_objects[platform].platform_directory_name, ) output["sondes"].update(flight.populate_sonde_instances()) - return output["sondes"] + return output["platforms"], output["sondes"] def iterate_Sonde_method_over_dict_of_Sondes_objects( @@ -368,13 +371,14 @@ def run_pipeline(pipeline: dict, config: configparser.ConfigParser): "create_flight": { "intake": None, "apply": create_and_populate_flight_object, - "output": "sondes", + "output": ["platforms", "sondes"], }, "qc": { "intake": "sondes", "apply": iterate_Sonde_method_over_dict_of_Sondes_objects, "functions": [ "filter_no_launch_detect", + "detect_floater", "profile_fullness", "near_surface_coverage", "filter_qc_fail", @@ -385,6 +389,7 @@ def run_pipeline(pipeline: dict, config: configparser.ConfigParser): "intake": "sondes", "apply": iterate_Sonde_method_over_dict_of_Sondes_objects, "functions": [ + "create_interim_l2_ds", "convert_to_si", "get_l2_variables", "add_compression_and_encoding_properties", diff --git a/src/halodrops/sonde.py b/src/halodrops/processor.py similarity index 83% rename from src/halodrops/sonde.py rename to src/halodrops/processor.py index 5835ae0..78c1e10 100644 --- a/src/halodrops/sonde.py +++ b/src/halodrops/processor.py @@ -234,17 +234,117 @@ def filter_no_launch_detect(self) -> None: f"The attribute `launch_detect` does not exist for Sonde {self.serial_id}." ) + def detect_floater( + self, + gpsalt_threshold: float = 25, + consecutive_time_steps: int = 3, + skip: bool = False, + ): + """ + Detects if a sonde is a floater. + + Parameters + ---------- + gpsalt_threshold : float, optional + The gpsalt altitude below which the sonde will check for time periods when gpsalt and pres have not changed. Default is 25. + skip : bool, optional + If True, the function will return the object without performing any operations. Default is False. + + Returns + ------- + self + The object itself with the new `is_floater` attribute added based on the function parameters. + """ + if hh.get_bool(skip): + return self + else: + if isinstance(gpsalt_threshold, str): + gpsalt_threshold = float(gpsalt_threshold) + + if hasattr(self, "aspen_ds"): + surface_ds = ( + self.aspen_ds.where( + self.aspen_ds.gpsalt < gpsalt_threshold, drop=True + ) + .sortby("time") + .dropna(dim="time", how="any", subset=["pres", "gpsalt"]) + ) + gpsalt_diff = np.diff(surface_ds.gpsalt) + pressure_diff = np.diff(surface_ds.pres) + gpsalt_diff_below_threshold = ( + np.abs(gpsalt_diff) < 1 + ) # GPS altitude value at surface shouldn't change by more than 1 m + pressure_diff_below_threshold = ( + np.abs(pressure_diff) < 1 + ) # Pressure value at surface shouldn't change by more than 1 hPa + floater = gpsalt_diff_below_threshold & pressure_diff_below_threshold + if np.any(floater): + object.__setattr__(self, "is_floater", True) + for time_index in range(len(floater) - consecutive_time_steps + 1): + if np.all( + floater[time_index : time_index + consecutive_time_steps] + ): + landing_time = surface_ds.time[time_index - 1].values + object.__setattr__(self, "landing_time", landing_time) + print( + f"{self.serial_id}: Floater detected! The landing time is estimated as {landing_time}." + ) + break + if not hasattr(self, "landing_time"): + print( + f"{self.serial_id}: Floater detected! However, the landing time could not be estimated. Therefore setting landing time as {surface_ds.time[0].values}" + ) + object.__setattr__( + self, "landing_time", surface_ds.time[0].values + ) + else: + object.__setattr__(self, "is_floater", False) + else: + raise ValueError( + "The attribute `aspen_ds` does not exist. Please run `add_aspen_ds` method first." + ) + return self + + def crop_aspen_ds_to_landing_time(self): + """ + Crops the aspen_ds to the time period before landing. + + Parameters + ---------- + None + + Returns + ------- + self + The object itself with the new `cropped_aspen_ds` attribute added if the sonde is a floater. + """ + if hasattr(self, "is_floater"): + if self.is_floater: + if hasattr(self, "landing_time"): + object.__setattr__( + self, + "cropped_aspen_ds", + self.aspen_ds.sel(time=slice(self.landing_time, None)), + ) + else: + raise ValueError( + "The attribute `is_floater` does not exist. Please run `detect_floater` method first." + ) + return self + def profile_fullness( self, variable_dict={"u_wind": 4, "v_wind": 4, "rh": 2, "tdry": 2, "pres": 2}, time_dimension="time", timestamp_frequency=4, - fullness_threshold=0.8, + fullness_threshold=0.75, add_fullness_fraction_attribute=False, skip=False, ): """ Calculates the profile coverage for a given set of variables, considering their sampling frequency. + If the sonde is a floater, the function will take the `cropped_aspen_ds` attribute + (calculated with the `crop_aspen_ds_to_landing_time` method) as the dataset to calculate the profile coverage. This function assumes that the time_dimension coordinates are spaced over 0.25 seconds, implying a timestamp_frequency of 4 hertz. This is applicable for ASPEN-processed QC and PQC files, @@ -288,7 +388,13 @@ def profile_fullness( fullness_threshold = float(fullness_threshold) for variable, sampling_frequency in variable_dict.items(): - dataset = self.aspen_ds[variable] + if self.is_floater: + if not hasattr(self, "cropped_aspen_ds"): + self.crop_aspen_ds_to_landing_time() + dataset = self.cropped_aspen_ds[variable] + else: + dataset = self.aspen_ds[variable] + weighed_time_size = len(dataset[time_dimension]) / ( timestamp_frequency / sampling_frequency ) @@ -334,7 +440,7 @@ def near_surface_coverage( alt_bounds : list, optional The lower and upper bounds of altitude in meters to consider for the calculation. Defaults to [0,1000]. alt_dimension_name : str, optional - The name of the altitude dimension. Defaults to "alt". + The name of the altitude dimension. Defaults to "alt". If the sonde is a floater, this will be set to "gpsalt" regardless of user-provided value. count_threshold : int, optional The minimum count of non-null values required for a variable to be considered as having near surface coverage. Defaults to 50. add_near_surface_count_attribute : bool, optional @@ -360,6 +466,14 @@ def near_surface_coverage( "The attribute `aspen_ds` does not exist. Please run `add_aspen_ds` method first." ) + if not hasattr(self, "is_floater"): + raise ValueError( + "The attribute `is_floater` does not exist. Please run `detect_floater` method first." + ) + + if self.is_floater: + alt_dimension_name = "gpsalt" + if isinstance(alt_bounds, str): alt_bounds = alt_bounds.split(",") alt_bounds = [float(alt_bound) for alt_bound in alt_bounds] @@ -475,6 +589,30 @@ def filter_qc_fail(self, filter_flags=None): return self + def create_interim_l2_ds(self): + """ + Creates an interim L2 dataset from the aspen_ds or cropped_aspen_ds attribute. + + Parameters + ---------- + None + + Returns + ------- + self : object + Returns the sonde object with the interim L2 dataset added as an attribute. + """ + if self.is_floater: + if not hasattr(self, "cropped_aspen_ds"): + self.crop_aspen_ds_to_landing_time() + ds = self.cropped_aspen_ds + else: + ds = self.aspen_ds + + object.__setattr__(self, "_interim_l2_ds", ds) + + return self + def convert_to_si(self, variables=["rh", "pres", "tdry"], skip=False): """ Converts variables to SI units. @@ -646,6 +784,7 @@ def get_other_global_attributes(self): "launch_time_(UTC)": str(self.aspen_ds.launch_time.values) if hasattr(self.aspen_ds, "launch_time") else str(self.aspen_ds.base_time.values), + "is_floater": self.is_floater.__str__(), "sonde_serial_ID": self.serial_id, "author": "Geet George", "author_email": "g.george@tudelft.nl", @@ -772,3 +911,26 @@ def write_l2(self, l2_dir: str = None): self._interim_l2_ds.to_netcdf(os.path.join(l2_dir, self.l2_filename)) return self + + def add_l2_ds(self, l2_dir: str = None): + """ + Adds the L2 dataset as an attribute to the sonde object. + + Parameters + ---------- + l2_dir : str, optional + The directory to read the L2 file from. The default is the directory of the A-file with '0' replaced by '2'. + + Returns + ------- + self : object + Returns the sonde object with the L2 dataset added as an attribute. + """ + if l2_dir is None: + l2_dir = os.path.dirname(self.afile)[:-1] + "2" + + object.__setattr__( + self, "l2_ds", xr.open_dataset(os.path.join(l2_dir, self.l2_filename)) + ) + + return self diff --git a/tests/test_sonde.py b/tests/test_sonde.py index e08fdbf..d0e467f 100644 --- a/tests/test_sonde.py +++ b/tests/test_sonde.py @@ -1,7 +1,7 @@ import pytest import os import xarray as xr -from halodrops.sonde import Sonde +from halodrops.processor import Sonde s_id = "test_this_id" launch_time = "2020-02-02 20:22:02" @@ -11,7 +11,6 @@ def test_Sonde_attrs(): - TestSonde_nolaunchtime = Sonde(s_id) TestSonde_withlaunchtime = Sonde(s_id, launch_time=launch_time)