diff --git a/README.md b/README.md index 8087b48..943d6d6 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ It's a Python wrapper around [MetopDatasets.jl](https://github.com/eumetsat/Meto [MetopDatasets.jl](https://github.com/eumetsat/MetopDatasets.jl) is a package for reading products from the [METOP satellites](https://www.eumetsat.int/our-satellites/metop-series) using the native binary format specified for each product. The METOP satellites are part of the EUMETSAT-POLAR-SYSTEM (EPS) and have produced near real-time, global weather and climate observation since 2007. Learn more METOP and the data access on [EUMETSATs user-portal](https://user.eumetsat.int/dashboard). ## Status -Metopdatasetpy is under development and is not ready for use yet. +MetopPy is under development and is not ready for use yet. ## Copyright and License This code is licensed under MIT license. See file LICENSE for details on the usage and distribution terms. @@ -56,7 +56,7 @@ reduced_data_files = [f for f in reduced_data_folder.iterdir() if f.is_file()] test_file_name = next((s for s in reduced_data_files if s.name.startswith("ASCA_SZO"))) test_file_path = reduced_data_folder / test_file_name -ds = metop_reader.load_dataset(file_path=str(test_file_path)) +ds = metop_reader.open_dataset(file_path=str(test_file_path), maskingvalue = float("nan")) ``` 2. Check keys @@ -86,7 +86,7 @@ print(ds['latitude']) ``` latitude (42 × 10) - Datatype: Union{Missing, Float64} (Int32) + Datatype: Float64 (Int32) Dimensions: xtrack × atrack Attributes: description = Latitude (-90 to 90 deg) @@ -96,18 +96,18 @@ latitude (42 × 10) -4. Read variable +4. Read variable as neasted Python list ```python -from juliacall import Main -# Convert CFVariable to a full Julia Array -latitude_julia = Main.Array(ds['latitude']) # preserves the 2D shape +# Load CFVariable as a full Julia Array +latitude_julia = metop_reader.as_array(ds['latitude']) # preserves the 2D shape +latitude_shape = metop_reader.shape(latitude_julia) # Convert to nested Python list latitude_list = [ - [latitude_julia[i, j] for j in range(latitude_julia.size[1])] - for i in range(latitude_julia.size[0]) + [latitude_julia[i, j] for j in range(latitude_shape[1])] + for i in range(latitude_shape[0]) ] # Print first 5x5 elements @@ -128,7 +128,48 @@ for row in latitude_list[:5]: +5. Read a slice of the variable (This is a good way to limit memory use for large variables) +```python +longitude_slice = metop_reader.as_array(ds['longitude'][10:14,0:2]) +print(metop_reader.shape(longitude_slice)) +``` +
+ +Output of the print + +``` +(4, 2) +``` + +
+ +6. Covert julia array to numpy (requires that numpy is also installed) + +```python +import numpy as np +# Covert the julia array as numpy +longitude_slice_np = np.array(longitude_slice) # "copy = None" can be used to reduce memory +print(longitude_slice_np) +``` +
+ +Output of the print + +``` +[[233.685948 233.033544] + [233.276564 232.620004] + [232.858097 232.197423] + [232.430274 231.765534]] +``` + +
+ +7. Close dataset and free file lock + +```python +metop_reader.close_dataset(ds) +``` ## Development diff --git a/metoppy/metopreader.py b/metoppy/metopreader.py index 2c84fad..39b2fef 100644 --- a/metoppy/metopreader.py +++ b/metoppy/metopreader.py @@ -25,8 +25,7 @@ def __init__(self): # Import Julia package installed via juliapkg.json Main.seval("import MetopDatasets") # Store module and commonly used functions - self._keys = Main.MetopDatasets.keys - self._load_dataset = Main.MetopDatasets.MetopDataset + self._open_dataset = Main.MetopDatasets.MetopDataset self._get_test_data_artifact = Main.MetopDatasets.get_test_data_artifact def get_keys(self, dataset): @@ -43,27 +42,79 @@ def get_keys(self, dataset): list The list of keys available in the dataset. """ - return self._keys(dataset) + return Main.keys(dataset) + + def as_array(self, variable): + """ + Load the variable as a Julia array. + + Parameters + ---------- + variable : Julia object + A variable from a MetopDatasets.MetopDataset object. + + Returns + ------- + Julia array + The data from the variable loaded as an array. + """ + return Main.Array(variable) + + def shape(self, variable_or_j_array): + """ + Get the shape a Julia array or variable. + + Parameters + ---------- + variable_or_j_array : Julia object + A variable from a MetopDatasets.MetopDataset object or a Julia Array. + + Returns + ------- + Tuple of ints + The shape of the variable + """ + return Main.size(variable_or_j_array) - def load_dataset(self, file_path: str): + def open_dataset(self, file_path: str, maskingvalue = Main.missing): """ - Load a dataset from a record path using MetopDatasets.MetopDataset. + Open a dataset from a record path using MetopDatasets.MetopDataset. Parameters ---------- file_path : str Path to the dataset record. + maskingvalue + The masking values are used to replace missing observations. Defaults to Julia Missing type. + A recommended alternative is float("nan") which increasse performance for float data. + Returns ------- Julia object - A MetopDataset object loaded from the provided path. + A MetopDataset object opened from the provided path. """ try: - return self._load_dataset(file_path) + return self._open_dataset(file_path, maskingvalue = maskingvalue) except Exception as e: - raise RuntimeError(f"Failed to load dataset: {file_path}") from e + raise RuntimeError(f"Failed to open dataset: {file_path}") from e + + def close_dataset(self, dataset): + """ + Close a dataset and free the file lock created by MetopReader.open_dataset + Parameters + ---------- + dataset : Julia object + A dataset object created by MetopDatasets.MetopDataset. + + Returns + ------- + None + """ + Main.close(dataset) + return None + def get_test_data_artifact(self): """ Retrieve the test dataset artifact from MetopDatasets. diff --git a/metoppy/tests/test_get_test_data_artifact.py b/metoppy/tests/test_get_test_data_artifact.py index 733e78e..58ceb66 100644 --- a/metoppy/tests/test_get_test_data_artifact.py +++ b/metoppy/tests/test_get_test_data_artifact.py @@ -12,7 +12,6 @@ # TODO: Convert to pytest from pathlib import Path -from juliacall import Main from metoppy.metopreader import MetopReader metop_reader = MetopReader() @@ -23,7 +22,7 @@ test_file_name = next((s for s in reduced_data_files if s.name.startswith("ASCA_SZO"))) test_file_path = reduced_data_folder / test_file_name -ds = metop_reader.load_dataset(file_path=str(test_file_path)) +ds = metop_reader.open_dataset(file_path=str(test_file_path), maskingvalue = float("nan")) keys = metop_reader.get_keys(ds) print(list(keys)) @@ -31,12 +30,13 @@ print(ds["latitude"]) # Convert CFVariable to a full Julia Array -latitude_julia = Main.Array(ds["latitude"]) # preserves the 2D shape +latitude_julia = metop_reader.as_array(ds['latitude']) # preserves the 2D shape +latitude_shape = metop_reader.shape(latitude_julia) # Convert to nested Python list latitude_list = [ - [latitude_julia[i, j] for j in range(latitude_julia.size[1])] - for i in range(latitude_julia.size[0]) + [latitude_julia[i, j] for j in range(latitude_shape[1])] + for i in range(latitude_shape[0]) ] # Print first 5x5 elements