diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 2d1a76e..ea3a4cf 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -27,6 +27,8 @@ jobs: steps: - uses: actions/checkout@v3 + with: + submodules: 'true' - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 @@ -86,6 +88,7 @@ jobs: coverage: name: Coverage needs: test + if: github.ref == 'refs/heads/main' runs-on: ubuntu-latest steps: - name: Check out the repo diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..db4b88e --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "tests/hdf5"] + path = tests/hdf5 + url = https://github.com/duqtools/hdf5_testdata diff --git a/docs/plotting_example.md b/docs/examples.md similarity index 60% rename from docs/plotting_example.md rename to docs/examples.md index 53991db..d4b6a00 100644 --- a/docs/plotting_example.md +++ b/docs/examples.md @@ -1,4 +1,18 @@ -## Plotting example +# Examples + +## Modify data + +Below is an example of how to use **imas2xarray** to data in-place. + +Note that **Imas2xarray** can only update data in-place, i.e. the new data must have the same shape as the existing data. + +```python +{!../scripts/modify_data.py!} +``` + +[Source code](https://github.com/duqtools/imas2xarray/tree/main/scripts/modify_data.py) + +## Plotting single dataset Below is an example of how to use **imas2xarray** to plot data with [matplotlib](https://matplotlib.org/). @@ -8,6 +22,8 @@ Below is an example of how to use **imas2xarray** to plot data with [matplotlib] [Source code](https://github.com/duqtools/imas2xarray/tree/main/scripts/plot_with_matplotlib.py) +## Plotting multiple datasets + The code below shows how to make a plot with [matplotlib](https://matplotlib.org/) for multiple datasets. For a more advanced example of how to concatenate data, check out the [example notebooks](../notebooks/xarray). diff --git a/mkdocs.yml b/mkdocs.yml index 6741fb1..bd25065 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -7,7 +7,7 @@ nav: - Home: index.md - Variables: variables.md - Python API: api/index.md - - Plotting example: plotting_example.md + - Examples: examples.md - Notebook - Variables: notebooks/xarray.ipynb - Notebook - 2D data: notebooks/xarray-2D.ipynb - Notebook - Ions: notebooks/xarray-ions.ipynb @@ -17,7 +17,6 @@ nav: - 🔗 Issues: https://github.com/duqtools/imas2xarray/issues - 🔗 Duqtools: https://duqtools.readthedocs.io - theme: name: material primary: blue diff --git a/scripts/modify_data.py b/scripts/modify_data.py new file mode 100644 index 0000000..50fbfee --- /dev/null +++ b/scripts/modify_data.py @@ -0,0 +1,20 @@ +from imas2xarray import to_imas, to_xarray + +variables = ('rho_tor_norm', 'time', 't_e') +ids = 'core_profiles' + +dataset = to_xarray( + '/pfs/work/g2aho/public/imasdb/test/3/92436/1/', + ids=ids, + variables=variables, +) + +print(dataset['t_e']) +dataset['t_e'] += 1 + +to_imas( + '/pfs/work/g2aho/public/imasdb/test/3/92436/1/', + dataset=dataset, + ids=ids, + variables=variables, +) diff --git a/src/imas2xarray/__init__.py b/src/imas2xarray/__init__.py index 11c4cab..1ca7be2 100644 --- a/src/imas2xarray/__init__.py +++ b/src/imas2xarray/__init__.py @@ -1,6 +1,6 @@ from __future__ import annotations -from ._io import H5Handle, to_xarray +from ._io import H5Handle, to_imas, to_xarray from ._lookup import VariableConfigLoader, var_lookup from ._models import IDSPath, VariableConfigModel from ._models import IDSVariableModel as Variable @@ -30,6 +30,7 @@ 'standardize_grid', 'standardize_grid_and_time', 'to_xarray', + 'to_imas', 'var_lookup', 'VariableConfigLoader', 'VariableConfigModel', diff --git a/src/imas2xarray/_io.py b/src/imas2xarray/_io.py index 0f68b5a..bd1f01c 100644 --- a/src/imas2xarray/_io.py +++ b/src/imas2xarray/_io.py @@ -4,8 +4,9 @@ """ from __future__ import annotations +from contextlib import contextmanager from pathlib import Path -from typing import TYPE_CHECKING, Sequence +from typing import TYPE_CHECKING, Collection import h5py import numpy as np @@ -61,7 +62,62 @@ def _var_path_to_hdf5_key_and_slices(path: str) -> tuple[str, tuple[slice | int, return key, tuple(slices) -def to_xarray(path: str | Path, *, ids: str, variables: None | Sequence[str] = None): +def _mapping_to_xarray( + data_file: h5py.File, + variables: Collection[str | IDSVariableModel], + missing_ok: bool = False, + empty_ok: bool = False, +) -> xr.Dataset: + """Return dataset for given variables. + + Parameters + ---------- + data_file : h5py.File + Open hdf5 file + variables : Collection[str | IDSVariableModel]] + List of data variables + missing_ok : bool + Ignore missing variables from dataset + empty_ok : bool + Add empty fields to output + + Returns + ------- + ds : xr.Dataset + Return query as Dataset + """ + xr_data_vars: dict[str, tuple[list[str], np.ndarray]] = {} + + variables = var_lookup.lookup(variables) + + for var in variables: + key, slices = _var_path_to_hdf5_key_and_slices(var.path) + + if key not in data_file: + if missing_ok: + continue + raise MissingVarError( + f'{var.path} does not exist in data file (HDF5 key: {key!r}) .' + ) + + arr = data_file[key] + + if (not empty_ok) and (arr.size == 0): + raise EmptyVarError(f'Variable {var.name!r} contains empty data.') + + if len(slices) == 0: + xr_data_vars[var.name] = (var.dims, arr) + else: + xr_data_vars[var.name] = ([*var.dims], arr[slices]) + + ds = xr.Dataset(data_vars=xr_data_vars) + + return ds + + +def to_xarray( + path: str | Path, *, ids: str, variables: None | Collection[str] = None +) -> xr.Dataset: """Load IDS from given path to IMAS data into an xarray dataset. IMAS data must be in HDF5 format. @@ -78,23 +134,48 @@ def to_xarray(path: str | Path, *, ids: str, variables: None | Sequence[str] = N Returns ------- - ds : xr.Dataset + dataset : xr.Dataset Xarray dataset with all specified variables """ h = H5Handle(path) if variables: - return h.get_variables(variables=variables) + return h.get_variables(variables=variables, ids=ids) else: - return h.get_all_variables() + return h.get_all_variables(ids=ids) + + +def to_imas( + path: str | Path, dataset: xr.Dataset, *, ids: str, variables: None | Collection[str] = None +): + """Write variables in xarray dataset back to IMAS data at given path. + + Update only, IMAS data must be in HDF5 format. + + Parameters + ---------- + path : str | Path + Path to the data + dataset : xr.Dataset + Input dataset + ids : str + The IDS to write to (i.e. 'core_profiles') + variables : Collection[str] + List of variables to write back. If None, attempt to write back + all variables known to `imas2xarray` + """ + h = H5Handle(path) + + h.set_variables(dataset, ids=ids, variables=variables) class H5Handle: def __init__(self, path: Path | str): self.path = Path(path) - def open_ids(self, ids: str = 'core_profiles') -> h5py.File: - """Map the data to a dict-like structure. + @contextmanager + def open_ids(self, ids: str = 'core_profiles', mode='r') -> h5py.File: + """Context manager to open the IDS file. Parameters ---------- @@ -108,13 +189,15 @@ def open_ids(self, ids: str = 'core_profiles') -> h5py.File: data_file = (self.path / ids).with_suffix('.h5') assert data_file.exists() - return h5py.File(data_file, 'r')[ids] + with h5py.File(data_file, mode) as f: + yield f[ids] def get_all_variables( self, - extra_variables: None | Sequence[IDSVariableModel] = None, + *, + ids: str, + extra_variables: None | Collection[IDSVariableModel] = None, squash: bool = True, - ids: str = 'core_profiles', **kwargs, ) -> xr.Dataset: """Get all known variables from selected ids from the dataset. @@ -124,7 +207,9 @@ def get_all_variables( Parameters ---------- - extra_variables : Sequence[IDSVariableModel] + ids : str + The IDS to write to (i.e. 'core_profiles') + extra_variables : Collection[IDSVariableModel] Extra variables to load in addition to the ones known through the config squash : bool Squash placeholder variables @@ -135,21 +220,18 @@ def get_all_variables( ------- ds : xarray The data in `xarray` format. - - Raises - ------ - ValueError - When variables are from multiple IDSs. """ extra_variables = extra_variables or [] idsvar_lookup = var_lookup.filter_ids(ids) variables = list(set(list(extra_variables) + list(idsvar_lookup.keys()))) - return self.get_variables(variables, squash, missing_ok=True, **kwargs) + return self.get_variables(variables, ids=ids, squash=squash, missing_ok=True, **kwargs) def get_variables( self, - variables: Sequence[str | IDSVariableModel], + variables: Collection[str | IDSVariableModel], + *, + ids: str, squash: bool = True, **kwargs, ) -> xr.Dataset: @@ -160,8 +242,10 @@ def get_variables( Parameters ---------- - variables : Sequence[Union[str, IDSVariableModel]] + variables : Collection[Union[str, IDSVariableModel]] Variable names of the data to load. + ids : str + The IDS to write to (i.e. 'core_profiles') squash : bool Squash placeholder variables **kwargs @@ -175,75 +259,51 @@ def get_variables( Raises ------ ValueError - When variables are from multiple IDSs. + When variables are from different IDS. """ var_models = var_lookup.lookup(variables) - idss = {var.ids for var in var_models} + for var in var_models: + if var.ids != ids: + raise ValueError(f'Variable {var} does not belong to {ids}.') - if len(idss) > 1: - raise ValueError(f'All variables must belong to the same IDS, got {idss}') - - ids = var_models[0].ids - - data_file = self.open_ids(ids) - - ds = self.to_xarray(data_file, variables=var_models, **kwargs) + with self.open_ids(ids, 'r') as group: + ds = _mapping_to_xarray(group, variables=var_models, **kwargs) if squash: ds = squash_placeholders(ds) return ds - @staticmethod - def to_xarray( - data_file: h5py.File, - variables: Sequence[str | IDSVariableModel], - missing_ok: bool = False, - empty_ok: bool = False, - ) -> xr.Dataset: - """Return dataset for given variables. + def set_variables( + self, dataset: xr.Dataset, *, ids: str, variables: None | Collection[str] = None + ): + """Update variables in corresponding ids datafile. Parameters ---------- - data_file : h5py.File - Open hdf5 file - variables : Sequence[str | IDSVariableModel]] - Dictionary of data variables - missing_ok : bool - Ignore missing variables from dataset - empty_ok : bool - Add empty fields to output - - Returns - ------- - ds : xr.Dataset - Return query as Dataset + dataset : xr.Dataset + Dataset with variables to write. Their dimensions must match those of the + target dataset. + ids : str + IDS to write to. + variables : Collection[str], optional + List of data variables to write. """ - xr_data_vars: dict[str, tuple[list[str], np.ndarray]] = {} - - variables = var_lookup.lookup(variables) - - for var in variables: - key, slices = _var_path_to_hdf5_key_and_slices(var.path) + if not variables: + variables = list(dataset.variables) + # TODO: check variables in var_lookup - if key not in data_file: - if missing_ok: - continue - raise MissingVarError( - f'{var.path} does not exist in data file (HDF5 key: {key!r}) .' - ) - - arr = data_file[key] + var_models = var_lookup.lookup(variables) - if (not empty_ok) and (arr.size == 0): - raise EmptyVarError(f'Variable {var.name!r} contains empty data.') + for var in var_models: + if var.ids != ids: + raise ValueError(f'Variable {var} does not belong to {ids}.') - if len(slices) == 0: - xr_data_vars[var.name] = (var.dims, arr) - else: - xr_data_vars[var.name] = ([*var.dims], arr[slices]) + with self.open_ids(ids, 'r+') as group: + for var in var_models: + arr = dataset[var.name] - ds = xr.Dataset(data_vars=xr_data_vars) # type: ignore + key, slices = _var_path_to_hdf5_key_and_slices(var.path) - return ds + group[key][slices] = arr diff --git a/src/imas2xarray/_lookup.py b/src/imas2xarray/_lookup.py index a4da0e0..87c1003 100644 --- a/src/imas2xarray/_lookup.py +++ b/src/imas2xarray/_lookup.py @@ -6,7 +6,7 @@ import sys from collections import UserDict from pathlib import Path, PosixPath -from typing import Any, Hashable, Sequence +from typing import Any, Collection, Hashable from pydantic_yaml import parse_yaml_raw_as @@ -79,7 +79,7 @@ def groupby_ids(self) -> dict[Hashable, list[IDSVariableModel]]: grouped_ids_vars = groupby(ids_vars, keyfunc=lambda var: var.ids) return grouped_ids_vars - def lookup(self, variables: Sequence[(str | IDSVariableModel)]) -> list[IDSVariableModel]: + def lookup(self, variables: Collection[(str | IDSVariableModel)]) -> list[IDSVariableModel]: """Helper function to look up a bunch of variables. If str, look up the variable from the `var_lookup`. Else, check if diff --git a/tests/hdf5 b/tests/hdf5 new file mode 160000 index 0000000..a33bcf2 --- /dev/null +++ b/tests/hdf5 @@ -0,0 +1 @@ +Subproject commit a33bcf2afb8257683942fd39072177aacc9b11c1 diff --git a/tests/test_rebase.py b/tests/test_rebase.py index eedb5bf..d7ac28c 100644 --- a/tests/test_rebase.py +++ b/tests/test_rebase.py @@ -6,13 +6,13 @@ from idsmapping_sample_data import sample_data from imas2xarray import ( - H5Handle, Variable, rebase_on_grid, rebase_on_time, rezero_time, standardize_grid, ) +from imas2xarray._io import _mapping_to_xarray TIME_VAR = Variable( name='time', @@ -44,7 +44,7 @@ def variables1d(): @pytest.fixture def sample_dataset(variables1d): - ds = H5Handle.to_xarray(sample_data, variables=variables1d) + ds = _mapping_to_xarray(sample_data, variables=variables1d) ds_grid = standardize_grid(ds, old_dim='x', new_dim='xvar', group='time') return ds_grid diff --git a/tests/test_to_imas.py b/tests/test_to_imas.py new file mode 100644 index 0000000..516be93 --- /dev/null +++ b/tests/test_to_imas.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +import os +import shutil +from pathlib import Path + +from imas2xarray import to_imas, to_xarray + +DATA_DIR = Path(__file__).parent / 'hdf5' / 'data' + + +def test_to_imas(tmpdir): + filename = 'core_profiles.h5' + filepath = tmpdir / filename + + shutil.copy(DATA_DIR / filename, filepath) + + ids = 'core_profiles' + variables = ('rho_tor_norm', 'time', 't_e') + + dataset = to_xarray(tmpdir, variables=variables, ids=ids) + + assert filepath.exists() + mtime1 = os.stat(filepath).st_mtime + + to_imas(tmpdir, dataset=dataset, ids=ids, variables=variables) + + assert filepath.exists() + mtime2 = os.stat(filepath).st_mtime + + assert mtime2 != mtime1 diff --git a/tests/test_xarray_interface.py b/tests/test_to_xarray.py similarity index 93% rename from tests/test_xarray_interface.py rename to tests/test_to_xarray.py index db6bfc8..9a9f807 100644 --- a/tests/test_xarray_interface.py +++ b/tests/test_to_xarray.py @@ -4,8 +4,8 @@ import xarray as xr from idsmapping_sample_data import sample_data -from imas2xarray import H5Handle, Variable -from imas2xarray._io import EmptyVarError, MissingVarError +from imas2xarray import Variable +from imas2xarray._io import EmptyVarError, MissingVarError, _mapping_to_xarray TIME_VAR = Variable( name='time', @@ -233,7 +233,7 @@ def test_no_time_index(expected_dataset_no_index): ), ] - ds = H5Handle.to_xarray(sample_data, variables=variables) + ds = _mapping_to_xarray(sample_data, variables=variables) xr.testing.assert_equal(ds, expected_dataset_no_index) @@ -246,7 +246,7 @@ def test_0d(expected_dataset_0d): dims=['x'], ), ] - ds = H5Handle.to_xarray(sample_data, variables=variables) + ds = _mapping_to_xarray(sample_data, variables=variables) xr.testing.assert_equal(ds, expected_dataset_0d) @@ -268,7 +268,7 @@ def test_1d(expected_dataset_1d): ), ] - ds = H5Handle.to_xarray(sample_data, variables=variables) + ds = _mapping_to_xarray(sample_data, variables=variables) xr.testing.assert_equal(ds, expected_dataset_1d) @@ -289,7 +289,7 @@ def test_2d(expected_dataset_2d): ), ] - ds = H5Handle.to_xarray(sample_data, variables=variables) + ds = _mapping_to_xarray(sample_data, variables=variables) xr.testing.assert_equal(ds, expected_dataset_2d) @@ -310,7 +310,7 @@ def test_2d_ion(expected_dataset_2d_ion): ), ] - ds = H5Handle.to_xarray(sample_data, variables=variables) + ds = _mapping_to_xarray(sample_data, variables=variables) xr.testing.assert_equal(ds, expected_dataset_2d_ion) @@ -320,9 +320,9 @@ def test_empty_var_ok(): ) with pytest.raises(EmptyVarError): - H5Handle.to_xarray(sample_data, variables=(EmptyVar,), empty_ok=False) + _mapping_to_xarray(sample_data, variables=(EmptyVar,), empty_ok=False) - ds = H5Handle.to_xarray(sample_data, variables=(EmptyVar,), empty_ok=True) + ds = _mapping_to_xarray(sample_data, variables=(EmptyVar,), empty_ok=True) assert ds['empty'].size == 0 @@ -336,5 +336,5 @@ def test_raise_on_non_existant(): ) with pytest.raises(MissingVarError): - H5Handle.to_xarray(sample_data, variables=(NonExistantVar,), missing_ok=True) - H5Handle.to_xarray(sample_data, variables=(NonExistantVar,), missing_ok=False) + _mapping_to_xarray(sample_data, variables=(NonExistantVar,), missing_ok=True) + _mapping_to_xarray(sample_data, variables=(NonExistantVar,), missing_ok=False)