From 256bfe08d9c012d229e253e6884de3870535d479 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Wed, 6 Dec 2023 17:29:19 +0100 Subject: [PATCH 01/12] Set up test --- tests/test_to_imas.py | 32 +++++++++++++++++++ ..._xarray_interface.py => test_to_xarray.py} | 0 2 files changed, 32 insertions(+) create mode 100644 tests/test_to_imas.py rename tests/{test_xarray_interface.py => test_to_xarray.py} (100%) diff --git a/tests/test_to_imas.py b/tests/test_to_imas.py new file mode 100644 index 0000000..ce54bfc --- /dev/null +++ b/tests/test_to_imas.py @@ -0,0 +1,32 @@ +from imas2xarray import H5Handle, Variable, to_imas +import pytest + + +@pytest.fixture +def dataset(): + ds = 123 + return ds + + + +def to_imas(file: str | Path, arr: xr.Dataset, ids: str, variables: Collection[str]): + + +def test_to_imas(dataset, tmpdir): + # copy data to tempdir + + h = H5Handle(tmpdir / 'my_data') + + ids = 'core_profiles' + variables = 'zeff', 't_e' + + path = (h.path / ids).with_suffix('h5') + assert path.exists() + mtime1 = os.stat(path).st_mtime + + h.to_imas(xarray, ids=ids, variables=variables) + + assert (h.path / ids).with_suffix('h5').exists() + mtime2 = os.stat(path).st_mtime + assert mtime2 != mtime1 + diff --git a/tests/test_xarray_interface.py b/tests/test_to_xarray.py similarity index 100% rename from tests/test_xarray_interface.py rename to tests/test_to_xarray.py From 7bf08d7e84941d00300bab5662640f8f737c5290 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Wed, 6 Dec 2023 17:53:49 +0100 Subject: [PATCH 02/12] Set up boilerplate --- src/imas2xarray/_io.py | 91 +++++++++++++++++++++++++++----------- src/imas2xarray/_lookup.py | 4 +- tests/test_to_imas.py | 39 ++++++++-------- 3 files changed, 88 insertions(+), 46 deletions(-) diff --git a/src/imas2xarray/_io.py b/src/imas2xarray/_io.py index 0f68b5a..21afe54 100644 --- a/src/imas2xarray/_io.py +++ b/src/imas2xarray/_io.py @@ -5,7 +5,7 @@ from __future__ import annotations from pathlib import Path -from typing import TYPE_CHECKING, Sequence +from typing import TYPE_CHECKING, Collection import h5py import numpy as np @@ -61,7 +61,9 @@ def _var_path_to_hdf5_key_and_slices(path: str) -> tuple[str, tuple[slice | int, return key, tuple(slices) -def to_xarray(path: str | Path, *, ids: str, variables: None | Sequence[str] = None): +def to_xarray( + path: str | Path, *, ids: str, variables: None | Collection[str] = None +) -> xr.Dataset: """Load IDS from given path to IMAS data into an xarray dataset. IMAS data must be in HDF5 format. @@ -78,15 +80,39 @@ def to_xarray(path: str | Path, *, ids: str, variables: None | Sequence[str] = N Returns ------- - ds : xr.Dataset + dataset : xr.Dataset Xarray dataset with all specified variables """ h = H5Handle(path) if variables: - return h.get_variables(variables=variables) + return h.get_variables(variables=variables, ids=ids) else: - return h.get_all_variables() + return h.get_all_variables(ids=ids) + + +def to_imas( + path: str | Path, dataset: xr.Dataset, *, ids: str, variables: None | Collection[str] = None +): + """Write variables in xarray dataset back to IMAS data at given path. + + Update only, IMAS data must be in HDF5 format. + + Parameters + ---------- + path : str | Path + Path to the data + dataset : xr.Dataset + Input dataset + ids : str + The IDS to write to (i.e. 'core_profiles') + variables : Collection[str] + List of variables to write back. If None, attempt to write back + all variables known to `imas2xarray` + """ + h = H5Handle(path) + + h.set_variables(dataset, ids=ids, variables=variables) class H5Handle: @@ -112,9 +138,10 @@ def open_ids(self, ids: str = 'core_profiles') -> h5py.File: def get_all_variables( self, - extra_variables: None | Sequence[IDSVariableModel] = None, + *, + ids: str, + extra_variables: None | Collection[IDSVariableModel] = None, squash: bool = True, - ids: str = 'core_profiles', **kwargs, ) -> xr.Dataset: """Get all known variables from selected ids from the dataset. @@ -124,7 +151,9 @@ def get_all_variables( Parameters ---------- - extra_variables : Sequence[IDSVariableModel] + ids : str + The IDS to write to (i.e. 'core_profiles') + extra_variables : Collection[IDSVariableModel] Extra variables to load in addition to the ones known through the config squash : bool Squash placeholder variables @@ -135,21 +164,18 @@ def get_all_variables( ------- ds : xarray The data in `xarray` format. - - Raises - ------ - ValueError - When variables are from multiple IDSs. """ extra_variables = extra_variables or [] idsvar_lookup = var_lookup.filter_ids(ids) variables = list(set(list(extra_variables) + list(idsvar_lookup.keys()))) - return self.get_variables(variables, squash, missing_ok=True, **kwargs) + return self.get_variables(variables, ids=ids, squash=squash, missing_ok=True, **kwargs) def get_variables( self, - variables: Sequence[str | IDSVariableModel], + variables: Collection[str | IDSVariableModel], + *, + ids: str, squash: bool = True, **kwargs, ) -> xr.Dataset: @@ -160,8 +186,10 @@ def get_variables( Parameters ---------- - variables : Sequence[Union[str, IDSVariableModel]] + variables : Collection[Union[str, IDSVariableModel]] Variable names of the data to load. + ids : str + The IDS to write to (i.e. 'core_profiles') squash : bool Squash placeholder variables **kwargs @@ -175,16 +203,13 @@ def get_variables( Raises ------ ValueError - When variables are from multiple IDSs. + When variables are from different IDS. """ var_models = var_lookup.lookup(variables) - idss = {var.ids for var in var_models} - - if len(idss) > 1: - raise ValueError(f'All variables must belong to the same IDS, got {idss}') - - ids = var_models[0].ids + for var in var_models: + if var.ids != ids: + raise ValueError(f'Variable {var} does not belong to {ids}.') data_file = self.open_ids(ids) @@ -198,7 +223,7 @@ def get_variables( @staticmethod def to_xarray( data_file: h5py.File, - variables: Sequence[str | IDSVariableModel], + variables: Collection[str | IDSVariableModel], missing_ok: bool = False, empty_ok: bool = False, ) -> xr.Dataset: @@ -208,7 +233,7 @@ def to_xarray( ---------- data_file : h5py.File Open hdf5 file - variables : Sequence[str | IDSVariableModel]] + variables : Collection[str | IDSVariableModel]] Dictionary of data variables missing_ok : bool Ignore missing variables from dataset @@ -247,3 +272,19 @@ def to_xarray( ds = xr.Dataset(data_vars=xr_data_vars) # type: ignore return ds + + def set_variables( + self, dataset: xr.Dataset, *, ids: str, variables: None | Collection[str] = None + ): + """Summary. + + Parameters + ---------- + dataset : xr.Dataset + Description + ids : str + Description + variables : Collection[str], optional + Description + """ + pass diff --git a/src/imas2xarray/_lookup.py b/src/imas2xarray/_lookup.py index a4da0e0..87c1003 100644 --- a/src/imas2xarray/_lookup.py +++ b/src/imas2xarray/_lookup.py @@ -6,7 +6,7 @@ import sys from collections import UserDict from pathlib import Path, PosixPath -from typing import Any, Hashable, Sequence +from typing import Any, Collection, Hashable from pydantic_yaml import parse_yaml_raw_as @@ -79,7 +79,7 @@ def groupby_ids(self) -> dict[Hashable, list[IDSVariableModel]]: grouped_ids_vars = groupby(ids_vars, keyfunc=lambda var: var.ids) return grouped_ids_vars - def lookup(self, variables: Sequence[(str | IDSVariableModel)]) -> list[IDSVariableModel]: + def lookup(self, variables: Collection[(str | IDSVariableModel)]) -> list[IDSVariableModel]: """Helper function to look up a bunch of variables. If str, look up the variable from the `var_lookup`. Else, check if diff --git a/tests/test_to_imas.py b/tests/test_to_imas.py index ce54bfc..1511e64 100644 --- a/tests/test_to_imas.py +++ b/tests/test_to_imas.py @@ -1,32 +1,33 @@ -from imas2xarray import H5Handle, Variable, to_imas -import pytest +from __future__ import annotations +import os -@pytest.fixture -def dataset(): - ds = 123 - return ds +import pytest +from imas2xarray import H5Handle -def to_imas(file: str | Path, arr: xr.Dataset, ids: str, variables: Collection[str]): +@pytest.fixture +def dataset(): + ds = 123 + return ds +@pytest.mark.xfail def test_to_imas(dataset, tmpdir): - # copy data to tempdir - - h = H5Handle(tmpdir / 'my_data') + # copy data to tempdir - ids = 'core_profiles' - variables = 'zeff', 't_e' + h = H5Handle(tmpdir / 'my_data') - path = (h.path / ids).with_suffix('h5') - assert path.exists() - mtime1 = os.stat(path).st_mtime + ids = 'core_profiles' + variables = 'zeff', 't_e' - h.to_imas(xarray, ids=ids, variables=variables) + path = (h.path / ids).with_suffix('h5') + assert path.exists() + mtime1 = os.stat(path).st_mtime - assert (h.path / ids).with_suffix('h5').exists() - mtime2 = os.stat(path).st_mtime - assert mtime2 != mtime1 + h.to_imas(dataset, ids=ids, variables=variables) + assert (h.path / ids).with_suffix('h5').exists() + mtime2 = os.stat(path).st_mtime + assert mtime2 != mtime1 From 47a64ddcda54d553897de8bfdb402455a495070a Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Thu, 7 Dec 2023 17:31:14 +0100 Subject: [PATCH 03/12] Implement basic writer --- scripts/modify_data.py | 23 +++++++++++++++++++++++ src/imas2xarray/__init__.py | 3 ++- src/imas2xarray/_io.py | 32 +++++++++++++++++++++++++++----- 3 files changed, 52 insertions(+), 6 deletions(-) create mode 100644 scripts/modify_data.py diff --git a/scripts/modify_data.py b/scripts/modify_data.py new file mode 100644 index 0000000..c8c6426 --- /dev/null +++ b/scripts/modify_data.py @@ -0,0 +1,23 @@ +from imas2xarray import to_imas, to_xarray + +x_var = 'rho_tor_norm' +y_var = 't_e' +time_var = 'time' + +variables = (x_var, y_var, time_var) + +dataset = to_xarray( + './data', + ids='core_profiles', + variables=variables, +) + +print(dataset['t_e']) +dataset['t_e'] += 1 + +to_imas( + './data', + dataset=dataset, + ids='core_profiles', + variables=variables, +) diff --git a/src/imas2xarray/__init__.py b/src/imas2xarray/__init__.py index 11c4cab..1ca7be2 100644 --- a/src/imas2xarray/__init__.py +++ b/src/imas2xarray/__init__.py @@ -1,6 +1,6 @@ from __future__ import annotations -from ._io import H5Handle, to_xarray +from ._io import H5Handle, to_imas, to_xarray from ._lookup import VariableConfigLoader, var_lookup from ._models import IDSPath, VariableConfigModel from ._models import IDSVariableModel as Variable @@ -30,6 +30,7 @@ 'standardize_grid', 'standardize_grid_and_time', 'to_xarray', + 'to_imas', 'var_lookup', 'VariableConfigLoader', 'VariableConfigModel', diff --git a/src/imas2xarray/_io.py b/src/imas2xarray/_io.py index 21afe54..802b0f6 100644 --- a/src/imas2xarray/_io.py +++ b/src/imas2xarray/_io.py @@ -119,7 +119,7 @@ class H5Handle: def __init__(self, path: Path | str): self.path = Path(path) - def open_ids(self, ids: str = 'core_profiles') -> h5py.File: + def open_ids(self, ids: str = 'core_profiles', mode='r') -> h5py.File: """Map the data to a dict-like structure. Parameters @@ -134,7 +134,7 @@ def open_ids(self, ids: str = 'core_profiles') -> h5py.File: data_file = (self.path / ids).with_suffix('.h5') assert data_file.exists() - return h5py.File(data_file, 'r')[ids] + return h5py.File(data_file, mode)[ids] def get_all_variables( self, @@ -211,9 +211,12 @@ def get_variables( if var.ids != ids: raise ValueError(f'Variable {var} does not belong to {ids}.') - data_file = self.open_ids(ids) + # TODO: use with statement + group = self.open_ids(ids, 'r') - ds = self.to_xarray(data_file, variables=var_models, **kwargs) + ds = self.to_xarray(group, variables=var_models, **kwargs) + + group.file.close() if squash: ds = squash_placeholders(ds) @@ -287,4 +290,23 @@ def set_variables( variables : Collection[str], optional Description """ - pass + if not variables: + variables = list(dataset.variables) + # TODO: check variables in var_lookup + + var_models = var_lookup.lookup(variables) + + for var in var_models: + if var.ids != ids: + raise ValueError(f'Variable {var} does not belong to {ids}.') + + group = self.open_ids(ids, 'r+') + + for var in var_models: + arr = dataset[var.name] + + key, slices = _var_path_to_hdf5_key_and_slices(var.path) + + group[key][slices] = arr + + group.file.close() From fe79c316300ce13cca5624245d4b35722b5e21e3 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Thu, 7 Dec 2023 17:39:03 +0100 Subject: [PATCH 04/12] Tweak script --- scripts/modify_data.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/scripts/modify_data.py b/scripts/modify_data.py index c8c6426..10388a5 100644 --- a/scripts/modify_data.py +++ b/scripts/modify_data.py @@ -1,14 +1,11 @@ from imas2xarray import to_imas, to_xarray -x_var = 'rho_tor_norm' -y_var = 't_e' -time_var = 'time' - -variables = (x_var, y_var, time_var) +variables = ('rho_tor_norm', 'time', 't_e') +ids = 'core_profiles' dataset = to_xarray( './data', - ids='core_profiles', + ids=ids, variables=variables, ) @@ -18,6 +15,6 @@ to_imas( './data', dataset=dataset, - ids='core_profiles', + ids=ids, variables=variables, ) From 9db36ea6c3b3bdbea4f350e1429460b44d6c7559 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Mon, 11 Dec 2023 09:52:08 +0100 Subject: [PATCH 05/12] Update examples --- docs/{plotting_example.md => examples.md} | 18 +++++++++++++++++- docs/modify_data.md | 9 +++++++++ mkdocs.yml | 3 +-- scripts/modify_data.py | 4 ++-- 4 files changed, 29 insertions(+), 5 deletions(-) rename docs/{plotting_example.md => examples.md} (60%) create mode 100644 docs/modify_data.md diff --git a/docs/plotting_example.md b/docs/examples.md similarity index 60% rename from docs/plotting_example.md rename to docs/examples.md index 53991db..d4b6a00 100644 --- a/docs/plotting_example.md +++ b/docs/examples.md @@ -1,4 +1,18 @@ -## Plotting example +# Examples + +## Modify data + +Below is an example of how to use **imas2xarray** to data in-place. + +Note that **Imas2xarray** can only update data in-place, i.e. the new data must have the same shape as the existing data. + +```python +{!../scripts/modify_data.py!} +``` + +[Source code](https://github.com/duqtools/imas2xarray/tree/main/scripts/modify_data.py) + +## Plotting single dataset Below is an example of how to use **imas2xarray** to plot data with [matplotlib](https://matplotlib.org/). @@ -8,6 +22,8 @@ Below is an example of how to use **imas2xarray** to plot data with [matplotlib] [Source code](https://github.com/duqtools/imas2xarray/tree/main/scripts/plot_with_matplotlib.py) +## Plotting multiple datasets + The code below shows how to make a plot with [matplotlib](https://matplotlib.org/) for multiple datasets. For a more advanced example of how to concatenate data, check out the [example notebooks](../notebooks/xarray). diff --git a/docs/modify_data.md b/docs/modify_data.md new file mode 100644 index 0000000..374caf7 --- /dev/null +++ b/docs/modify_data.md @@ -0,0 +1,9 @@ +## Modify data + +Below is an example of how to use **imas2xarray** to data in-place. + +```python +{!../scripts/modify_data.py!} +``` + +[Source code](https://github.com/duqtools/imas2xarray/tree/main/scripts/modify_data.py) diff --git a/mkdocs.yml b/mkdocs.yml index 6741fb1..bd25065 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -7,7 +7,7 @@ nav: - Home: index.md - Variables: variables.md - Python API: api/index.md - - Plotting example: plotting_example.md + - Examples: examples.md - Notebook - Variables: notebooks/xarray.ipynb - Notebook - 2D data: notebooks/xarray-2D.ipynb - Notebook - Ions: notebooks/xarray-ions.ipynb @@ -17,7 +17,6 @@ nav: - 🔗 Issues: https://github.com/duqtools/imas2xarray/issues - 🔗 Duqtools: https://duqtools.readthedocs.io - theme: name: material primary: blue diff --git a/scripts/modify_data.py b/scripts/modify_data.py index 10388a5..50fbfee 100644 --- a/scripts/modify_data.py +++ b/scripts/modify_data.py @@ -4,7 +4,7 @@ ids = 'core_profiles' dataset = to_xarray( - './data', + '/pfs/work/g2aho/public/imasdb/test/3/92436/1/', ids=ids, variables=variables, ) @@ -13,7 +13,7 @@ dataset['t_e'] += 1 to_imas( - './data', + '/pfs/work/g2aho/public/imasdb/test/3/92436/1/', dataset=dataset, ids=ids, variables=variables, From e3da6013ff766498cb0c347f254174ab33e465de Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Mon, 11 Dec 2023 12:11:12 +0100 Subject: [PATCH 06/12] Add test data submodule --- .gitmodules | 3 +++ tests/hdf5 | 1 + 2 files changed, 4 insertions(+) create mode 100644 .gitmodules create mode 160000 tests/hdf5 diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..db4b88e --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "tests/hdf5"] + path = tests/hdf5 + url = https://github.com/duqtools/hdf5_testdata diff --git a/tests/hdf5 b/tests/hdf5 new file mode 160000 index 0000000..a33bcf2 --- /dev/null +++ b/tests/hdf5 @@ -0,0 +1 @@ +Subproject commit a33bcf2afb8257683942fd39072177aacc9b11c1 From 1549c31ee7a2601751b344bd1268a8fa4a7c5621 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Mon, 11 Dec 2023 12:11:25 +0100 Subject: [PATCH 07/12] Remove redundant file --- docs/modify_data.md | 9 --------- 1 file changed, 9 deletions(-) delete mode 100644 docs/modify_data.md diff --git a/docs/modify_data.md b/docs/modify_data.md deleted file mode 100644 index 374caf7..0000000 --- a/docs/modify_data.md +++ /dev/null @@ -1,9 +0,0 @@ -## Modify data - -Below is an example of how to use **imas2xarray** to data in-place. - -```python -{!../scripts/modify_data.py!} -``` - -[Source code](https://github.com/duqtools/imas2xarray/tree/main/scripts/modify_data.py) From fe7952cc96861031d7480dfce26b812f5005a809 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Mon, 11 Dec 2023 14:35:05 +0100 Subject: [PATCH 08/12] Add test --- src/imas2xarray/_io.py | 2 +- tests/test_to_imas.py | 34 ++++++++++++++++------------------ 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/src/imas2xarray/_io.py b/src/imas2xarray/_io.py index 802b0f6..a1d7446 100644 --- a/src/imas2xarray/_io.py +++ b/src/imas2xarray/_io.py @@ -272,7 +272,7 @@ def to_xarray( else: xr_data_vars[var.name] = ([*var.dims], arr[slices]) - ds = xr.Dataset(data_vars=xr_data_vars) # type: ignore + ds = xr.Dataset(data_vars=xr_data_vars) return ds diff --git a/tests/test_to_imas.py b/tests/test_to_imas.py index 1511e64..516be93 100644 --- a/tests/test_to_imas.py +++ b/tests/test_to_imas.py @@ -1,33 +1,31 @@ from __future__ import annotations import os +import shutil +from pathlib import Path -import pytest +from imas2xarray import to_imas, to_xarray -from imas2xarray import H5Handle +DATA_DIR = Path(__file__).parent / 'hdf5' / 'data' -@pytest.fixture -def dataset(): - ds = 123 - return ds +def test_to_imas(tmpdir): + filename = 'core_profiles.h5' + filepath = tmpdir / filename + shutil.copy(DATA_DIR / filename, filepath) -@pytest.mark.xfail -def test_to_imas(dataset, tmpdir): - # copy data to tempdir + ids = 'core_profiles' + variables = ('rho_tor_norm', 'time', 't_e') - h = H5Handle(tmpdir / 'my_data') + dataset = to_xarray(tmpdir, variables=variables, ids=ids) - ids = 'core_profiles' - variables = 'zeff', 't_e' + assert filepath.exists() + mtime1 = os.stat(filepath).st_mtime - path = (h.path / ids).with_suffix('h5') - assert path.exists() - mtime1 = os.stat(path).st_mtime + to_imas(tmpdir, dataset=dataset, ids=ids, variables=variables) - h.to_imas(dataset, ids=ids, variables=variables) + assert filepath.exists() + mtime2 = os.stat(filepath).st_mtime - assert (h.path / ids).with_suffix('h5').exists() - mtime2 = os.stat(path).st_mtime assert mtime2 != mtime1 From 86a20a8a1389dff61778896408c096133cd37054 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Mon, 11 Dec 2023 15:06:52 +0100 Subject: [PATCH 09/12] Use contextmanager to open ids --- src/imas2xarray/_io.py | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/src/imas2xarray/_io.py b/src/imas2xarray/_io.py index a1d7446..34db2b6 100644 --- a/src/imas2xarray/_io.py +++ b/src/imas2xarray/_io.py @@ -4,6 +4,7 @@ """ from __future__ import annotations +from contextlib import contextmanager from pathlib import Path from typing import TYPE_CHECKING, Collection @@ -119,8 +120,9 @@ class H5Handle: def __init__(self, path: Path | str): self.path = Path(path) + @contextmanager def open_ids(self, ids: str = 'core_profiles', mode='r') -> h5py.File: - """Map the data to a dict-like structure. + """Context manager to open the IDS file. Parameters ---------- @@ -134,7 +136,8 @@ def open_ids(self, ids: str = 'core_profiles', mode='r') -> h5py.File: data_file = (self.path / ids).with_suffix('.h5') assert data_file.exists() - return h5py.File(data_file, mode)[ids] + with h5py.File(data_file, mode) as f: + yield f[ids] def get_all_variables( self, @@ -211,12 +214,8 @@ def get_variables( if var.ids != ids: raise ValueError(f'Variable {var} does not belong to {ids}.') - # TODO: use with statement - group = self.open_ids(ids, 'r') - - ds = self.to_xarray(group, variables=var_models, **kwargs) - - group.file.close() + with self.open_ids(ids, 'r') as group: + ds = self.to_xarray(group, variables=var_models, **kwargs) if squash: ds = squash_placeholders(ds) @@ -237,7 +236,7 @@ def to_xarray( data_file : h5py.File Open hdf5 file variables : Collection[str | IDSVariableModel]] - Dictionary of data variables + List of data variables missing_ok : bool Ignore missing variables from dataset empty_ok : bool @@ -279,16 +278,17 @@ def to_xarray( def set_variables( self, dataset: xr.Dataset, *, ids: str, variables: None | Collection[str] = None ): - """Summary. + """Update variables in corresponding ids datafile. Parameters ---------- dataset : xr.Dataset - Description + Dataset with variables to write. Their dimensions must match those of the + target dataset. ids : str - Description + IDS to write to. variables : Collection[str], optional - Description + List of data variables to write. """ if not variables: variables = list(dataset.variables) @@ -300,13 +300,10 @@ def set_variables( if var.ids != ids: raise ValueError(f'Variable {var} does not belong to {ids}.') - group = self.open_ids(ids, 'r+') - - for var in var_models: - arr = dataset[var.name] - - key, slices = _var_path_to_hdf5_key_and_slices(var.path) + with self.open_ids(ids, 'r+') as group: + for var in var_models: + arr = dataset[var.name] - group[key][slices] = arr + key, slices = _var_path_to_hdf5_key_and_slices(var.path) - group.file.close() + group[key][slices] = arr From 18dbd43e2f65564a9e75eed52286ea957ecb02bd Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Mon, 11 Dec 2023 15:18:15 +0100 Subject: [PATCH 10/12] Load submodule --- .github/workflows/test.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 2d1a76e..8336446 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -27,6 +27,8 @@ jobs: steps: - uses: actions/checkout@v3 + with: + submodules: 'true' - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 From dfd12c6a091cbc43a86cad7188dd3593cdaa3bab Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Mon, 11 Dec 2023 15:23:28 +0100 Subject: [PATCH 11/12] Move .to_xarray staticmethod --- src/imas2xarray/_io.py | 108 ++++++++++++++++++++-------------------- tests/test_rebase.py | 4 +- tests/test_to_xarray.py | 22 ++++---- 3 files changed, 67 insertions(+), 67 deletions(-) diff --git a/src/imas2xarray/_io.py b/src/imas2xarray/_io.py index 34db2b6..bd1f01c 100644 --- a/src/imas2xarray/_io.py +++ b/src/imas2xarray/_io.py @@ -62,6 +62,59 @@ def _var_path_to_hdf5_key_and_slices(path: str) -> tuple[str, tuple[slice | int, return key, tuple(slices) +def _mapping_to_xarray( + data_file: h5py.File, + variables: Collection[str | IDSVariableModel], + missing_ok: bool = False, + empty_ok: bool = False, +) -> xr.Dataset: + """Return dataset for given variables. + + Parameters + ---------- + data_file : h5py.File + Open hdf5 file + variables : Collection[str | IDSVariableModel]] + List of data variables + missing_ok : bool + Ignore missing variables from dataset + empty_ok : bool + Add empty fields to output + + Returns + ------- + ds : xr.Dataset + Return query as Dataset + """ + xr_data_vars: dict[str, tuple[list[str], np.ndarray]] = {} + + variables = var_lookup.lookup(variables) + + for var in variables: + key, slices = _var_path_to_hdf5_key_and_slices(var.path) + + if key not in data_file: + if missing_ok: + continue + raise MissingVarError( + f'{var.path} does not exist in data file (HDF5 key: {key!r}) .' + ) + + arr = data_file[key] + + if (not empty_ok) and (arr.size == 0): + raise EmptyVarError(f'Variable {var.name!r} contains empty data.') + + if len(slices) == 0: + xr_data_vars[var.name] = (var.dims, arr) + else: + xr_data_vars[var.name] = ([*var.dims], arr[slices]) + + ds = xr.Dataset(data_vars=xr_data_vars) + + return ds + + def to_xarray( path: str | Path, *, ids: str, variables: None | Collection[str] = None ) -> xr.Dataset: @@ -215,66 +268,13 @@ def get_variables( raise ValueError(f'Variable {var} does not belong to {ids}.') with self.open_ids(ids, 'r') as group: - ds = self.to_xarray(group, variables=var_models, **kwargs) + ds = _mapping_to_xarray(group, variables=var_models, **kwargs) if squash: ds = squash_placeholders(ds) return ds - @staticmethod - def to_xarray( - data_file: h5py.File, - variables: Collection[str | IDSVariableModel], - missing_ok: bool = False, - empty_ok: bool = False, - ) -> xr.Dataset: - """Return dataset for given variables. - - Parameters - ---------- - data_file : h5py.File - Open hdf5 file - variables : Collection[str | IDSVariableModel]] - List of data variables - missing_ok : bool - Ignore missing variables from dataset - empty_ok : bool - Add empty fields to output - - Returns - ------- - ds : xr.Dataset - Return query as Dataset - """ - xr_data_vars: dict[str, tuple[list[str], np.ndarray]] = {} - - variables = var_lookup.lookup(variables) - - for var in variables: - key, slices = _var_path_to_hdf5_key_and_slices(var.path) - - if key not in data_file: - if missing_ok: - continue - raise MissingVarError( - f'{var.path} does not exist in data file (HDF5 key: {key!r}) .' - ) - - arr = data_file[key] - - if (not empty_ok) and (arr.size == 0): - raise EmptyVarError(f'Variable {var.name!r} contains empty data.') - - if len(slices) == 0: - xr_data_vars[var.name] = (var.dims, arr) - else: - xr_data_vars[var.name] = ([*var.dims], arr[slices]) - - ds = xr.Dataset(data_vars=xr_data_vars) - - return ds - def set_variables( self, dataset: xr.Dataset, *, ids: str, variables: None | Collection[str] = None ): diff --git a/tests/test_rebase.py b/tests/test_rebase.py index eedb5bf..d7ac28c 100644 --- a/tests/test_rebase.py +++ b/tests/test_rebase.py @@ -6,13 +6,13 @@ from idsmapping_sample_data import sample_data from imas2xarray import ( - H5Handle, Variable, rebase_on_grid, rebase_on_time, rezero_time, standardize_grid, ) +from imas2xarray._io import _mapping_to_xarray TIME_VAR = Variable( name='time', @@ -44,7 +44,7 @@ def variables1d(): @pytest.fixture def sample_dataset(variables1d): - ds = H5Handle.to_xarray(sample_data, variables=variables1d) + ds = _mapping_to_xarray(sample_data, variables=variables1d) ds_grid = standardize_grid(ds, old_dim='x', new_dim='xvar', group='time') return ds_grid diff --git a/tests/test_to_xarray.py b/tests/test_to_xarray.py index db6bfc8..9a9f807 100644 --- a/tests/test_to_xarray.py +++ b/tests/test_to_xarray.py @@ -4,8 +4,8 @@ import xarray as xr from idsmapping_sample_data import sample_data -from imas2xarray import H5Handle, Variable -from imas2xarray._io import EmptyVarError, MissingVarError +from imas2xarray import Variable +from imas2xarray._io import EmptyVarError, MissingVarError, _mapping_to_xarray TIME_VAR = Variable( name='time', @@ -233,7 +233,7 @@ def test_no_time_index(expected_dataset_no_index): ), ] - ds = H5Handle.to_xarray(sample_data, variables=variables) + ds = _mapping_to_xarray(sample_data, variables=variables) xr.testing.assert_equal(ds, expected_dataset_no_index) @@ -246,7 +246,7 @@ def test_0d(expected_dataset_0d): dims=['x'], ), ] - ds = H5Handle.to_xarray(sample_data, variables=variables) + ds = _mapping_to_xarray(sample_data, variables=variables) xr.testing.assert_equal(ds, expected_dataset_0d) @@ -268,7 +268,7 @@ def test_1d(expected_dataset_1d): ), ] - ds = H5Handle.to_xarray(sample_data, variables=variables) + ds = _mapping_to_xarray(sample_data, variables=variables) xr.testing.assert_equal(ds, expected_dataset_1d) @@ -289,7 +289,7 @@ def test_2d(expected_dataset_2d): ), ] - ds = H5Handle.to_xarray(sample_data, variables=variables) + ds = _mapping_to_xarray(sample_data, variables=variables) xr.testing.assert_equal(ds, expected_dataset_2d) @@ -310,7 +310,7 @@ def test_2d_ion(expected_dataset_2d_ion): ), ] - ds = H5Handle.to_xarray(sample_data, variables=variables) + ds = _mapping_to_xarray(sample_data, variables=variables) xr.testing.assert_equal(ds, expected_dataset_2d_ion) @@ -320,9 +320,9 @@ def test_empty_var_ok(): ) with pytest.raises(EmptyVarError): - H5Handle.to_xarray(sample_data, variables=(EmptyVar,), empty_ok=False) + _mapping_to_xarray(sample_data, variables=(EmptyVar,), empty_ok=False) - ds = H5Handle.to_xarray(sample_data, variables=(EmptyVar,), empty_ok=True) + ds = _mapping_to_xarray(sample_data, variables=(EmptyVar,), empty_ok=True) assert ds['empty'].size == 0 @@ -336,5 +336,5 @@ def test_raise_on_non_existant(): ) with pytest.raises(MissingVarError): - H5Handle.to_xarray(sample_data, variables=(NonExistantVar,), missing_ok=True) - H5Handle.to_xarray(sample_data, variables=(NonExistantVar,), missing_ok=False) + _mapping_to_xarray(sample_data, variables=(NonExistantVar,), missing_ok=True) + _mapping_to_xarray(sample_data, variables=(NonExistantVar,), missing_ok=False) From cad5478993cb0a7953cf55e39025816ec97fbd3d Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Mon, 11 Dec 2023 15:27:06 +0100 Subject: [PATCH 12/12] Run coverage only on main branch --- .github/workflows/test.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 8336446..ea3a4cf 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -88,6 +88,7 @@ jobs: coverage: name: Coverage needs: test + if: github.ref == 'refs/heads/main' runs-on: ubuntu-latest steps: - name: Check out the repo