From 168a803f58e6cd52e1e7057eb27efd54dd77b095 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 23 Dec 2021 18:18:57 +0000 Subject: [PATCH 01/12] Methods for increasing radial resolution of a DataArray or Dataset --- xbout/boutdataarray.py | 173 +++++++++++++++++++++++++++++++++++++- xbout/boutdataset.py | 87 +++++++++++++++++++ xbout/geometries.py | 17 +--- xbout/tests/test_utils.py | 4 +- xbout/utils.py | 60 ++++++++++++- 5 files changed, 321 insertions(+), 20 deletions(-) diff --git a/xbout/boutdataarray.py b/xbout/boutdataarray.py index 09b34694..914b6b9d 100644 --- a/xbout/boutdataarray.py +++ b/xbout/boutdataarray.py @@ -15,7 +15,12 @@ from .plotting import plotfuncs from .plotting.utils import _create_norm from .region import _from_region -from .utils import _update_metadata_increased_resolution, _get_bounding_surfaces +from .utils import ( + _make_1d_xcoord, + _update_metadata_increased_x_resolution, + _update_metadata_increased_y_resolution, + _get_bounding_surfaces, +) @register_dataarray_accessor("bout") @@ -354,7 +359,7 @@ def interpolate_parallel( kwargs={"fill_value": "extrapolate"}, ) - da = _update_metadata_increased_resolution(da, n) + da = _update_metadata_increased_y_resolution(da, n) # Modify dy to be consistent with the higher resolution grid dy_array = xr.DataArray( @@ -381,6 +386,170 @@ def interpolate_parallel( return da + def interpolate_radial( + self, + region=None, + *, + psi=None, + dx=None, + n=None, + method="cubic", + return_dataset=False, + ): + """ + Interpolate in the parallel direction to get a higher resolution version of the + variable. + + Parameters + ---------- + region : str, optional + By default, return a result with all regions interpolated separately and then + combined. If an explicit region argument is passed, then return the variable + from only that region. + psi : 1d or 2d array, optional + Values of `psixy` to interpolate data to. If not given use `n` instead. If + `psi` is given, it must be a 1d array with psi values for the region if + `region` is passed and otherwise must be a 2d {x,y} array. + dx : 1d array, optional + New values of `dx`, corresponding to the values of `psi`. Required if `psi` + is passed. + n : int, optional + The factor to increase the resolution by. Defaults to the value set by + BoutDataset.setupParallelInterp(), or 10 if that has not been called. + method : str, optional + The interpolation method to use. Options from xarray.DataArray.interp(), + currently: linear, nearest, zero, slinear, quadratic, cubic. Default is + 'cubic'. + return_dataset : bool, optional + If this is set to True, return a Dataset containing this variable as a member + (by default returns a DataArray). Only used when region=None. + + Returns + ------- + A new DataArray containing a high-resolution version of the variable. (If + return_dataset=True, instead returns a Dataset containing the DataArray.) + """ + + if psi is not None and n is not None: + raise ValueError(f"Cannot pass both psi and n, got psi={psi}, n={n}") + + tcoord = self.data.metadata["bout_tdim"] + xcoord = self.data.metadata["bout_xdim"] + ycoord = self.data.metadata["bout_ydim"] + zcoord = self.data.metadata["bout_zdim"] + + if region is None: + # Call the single-region version of this method for each region, and combine + # the results together + if psi is None: + psi_parts = [None for _ in self._regions] + else: + psi_parts = [ + psi.bout.from_region(region, with_guards={xcoord: 2, ycoord: 0}) + .isel({ycoord: 0}, drop=True) + .data + for region in self._regions + ] + parts = [ + self.interpolate_radial( + region, psi=this_psi, n=n, method=method + ).bout.to_dataset() + for (region, this_psi) in zip(self._regions, psi_parts) + ] + + # 'region' is not the same for all parts, and should not exist in the result, + # so delete before merging + for part in parts: + if "region" in part.attrs: + del part.attrs["region"] + if "region" in part[self.data.name].attrs: + del part[self.data.name].attrs["region"] + + result = xr.combine_by_coords(parts, combine_attrs="drop_conflicts") + + _make_1d_xcoord(result) + + if return_dataset: + return result + else: + # Extract the DataArray to return + # Cannot call apply_geometry here, because we have not set ixseps1, + # ixseps2, which are needed to create the 'regions'. + return result[self.data.name] + + # Select a particular 'region' and interpolate to higher parallel resolution + da = self.data + + da = da.bout.from_region(region.name, with_guards={xcoord: 2, ycoord: 0}) + da = da.chunk({xcoord: None}) + + old_psi = da["psi_poloidal"].isel({ycoord: 0}, drop=True).values + + if psi is not None: + if dx is None: + raise ValueError("It is required to pass dx if psi is passed") + else: + # Do a rough approximation to the boundary values - expect accurate + # interpolations to be done by passing psi from a new grid file + if n is None: + n = self.fine_interpolation_factor + mxg = da.metadata["MXG"] + if da.metadata["keep_xboundaries"] and region.connection_inner_x is None: + xbndry_lower = mxg + else: + xbndry_lower = 0 + if da.metadata["keep_xboundaries"] and region.connection_outer_x is None: + xbndry_upper = mxg + else: + xbndry_upper = 0 + + nx_fine = n * region.nx + dx = (region.xouter - region.xinner) / nx_fine + + psi = np.linspace( + region.xinner - (xbndry_lower - 0.5) * dx, + region.xouter + (xbndry_upper - 0.5) * dx, + nx_fine + xbndry_lower + xbndry_upper, + ) + + # Modify dx to be consistent with the higher resolution grid + dx_array = xr.full_like(da["dx"], dx) + + # Use psi as a 1d x-coordinate for this interpolation. psixy depends only on x + # in each region (although it may be a different function of x in different + # regions). + del da[xcoord] + da[xcoord] = old_psi + + # This prevents da.interp() from being very slow. + # Apparently large attrs (i.e. regions) on a coordinate which is passed as an + # argument to dask.array.map_blocks() slow things down, maybe because coordinates + # are numpy arrays, not dask arrays? + # Slow-down was introduced in d062fa9e75c02fbfdd46e5d1104b9b12f034448f when + # _add_attrs_to_var(updated_ds, ycoord) was added in geometries.py + da[xcoord].attrs = {} + + da = da.interp( + {xcoord: psi}, + assume_sorted=True, + method=method, + kwargs={"fill_value": "extrapolate"}, + ) + + da = _update_metadata_increased_x_resolution(da) + + da["dx"][:] = dx.broadcast_like(da["dx"]).data + + # Remove regions which have incorrect information for the high-resolution grid. + # New regions will be generated when creating a new Dataset in + # BoutDataset.getHighParallelResVars + del da.attrs["regions"] + + # Remove x-coordinate, will recreate x-coordinate for combined DataArray + del da[xcoord] + + return da + def remove_yboundaries(self, return_dataset=False, remove_extra_upper=False): """ Remove y-boundary points, if present, from the DataArray diff --git a/xbout/boutdataset.py b/xbout/boutdataset.py index e2d2f0c1..e8b2868d 100644 --- a/xbout/boutdataset.py +++ b/xbout/boutdataset.py @@ -262,6 +262,93 @@ def find_with_dims(first_var, dims): return ds + def interpolate_radial(self, variables, **kwargs): + """ + Interpolate in the parallel direction to get a higher resolution version of the + variable. + + Note that the high-resolution variables are all loaded into memory, so most + likely it is necessary to select only a small number. The toroidal_points + argument can also be used to reduce the memory demand. + + Parameters + ---------- + variables : str or sequence of str or ... + The names of the variables to interpolate. If 'variables=...' is passed + explicitly, then interpolate all variables in the Dataset. + psi : 1d array, optional + Values of `psixy` to interpolate data to. If not given use `n` instead. If + `psi` is given, it must be a 1d array with psi values for the region if + `region` is passed and otherwise must be a 2d {x,y} array. + dx : 1d array, optional + New values of `dx`, corresponding to the values of `psi`. Required if `psi` + is passed. + n : int, optional + The factor to increase the resolution by. Defaults to the value set by + BoutDataset.setupParallelInterp(), or 10 if that has not been called. + method : str, optional + The interpolation method to use. Options from xarray.DataArray.interp(), + currently: linear, nearest, zero, slinear, quadratic, cubic. Default is + 'cubic'. + + Returns + ------- + A new Dataset containing a high-resolution versions of the variables. The new + Dataset is a valid BoutDataset, although containing only the specified variables. + """ + + if variables is ...: + variables = [v for v in self.data] + + if isinstance(variables, str): + variables = [variables] + if isinstance(variables, tuple): + variables = list(variables) + + # Need to start with a Dataset with attrs as merge() drops the attrs of the + # passed-in argument. + # Make sure the first variable has all dimensions so we don't lose any + # coordinates + def find_with_dims(first_var, dims): + if first_var is None: + dims = set(dims) + for v in variables: + if set(self.data[v].dims) == dims: + first_var = v + break + return first_var + + tcoord = self.data.metadata.get("bout_tdim", "t") + zcoord = self.data.metadata.get("bout_zdim", "z") + first_var = find_with_dims(None, self.data.dims) + first_var = find_with_dims(first_var, set(self.data.dims) - set(tcoord)) + first_var = find_with_dims(first_var, set(self.data.dims) - set(zcoord)) + first_var = find_with_dims( + first_var, set(self.data.dims) - set([tcoord, zcoord]) + ) + if first_var is None: + raise ValueError( + f"Could not find variable to interpolate with both " + f"{ds.metadata.get('bout_xdim', 'x')} and " + f"{ds.metadata.get('bout_ydim', 'y')} dimensions" + ) + variables.remove(first_var) + ds = self.data[first_var].bout.interpolate_radial(return_dataset=True, **kwargs) + xcoord = ds.metadata.get("bout_xdim", "x") + ycoord = ds.metadata.get("bout_ydim", "y") + for var in variables: + da = self.data[var] + if xcoord in da.dims and ycoord in da.dims: + ds = ds.merge(da.bout.interpolate_radial(return_dataset=True, **kwargs)) + elif xcoord not in da.dims: + ds[var] = da + # Can't interpolate a variable that depends on x but not y, so just skip + + # Apply geometry + ds = apply_geometry(ds, ds.geometry) + + return ds + def integrate_midpoints(self, variable, *, dims=None, cumulative_t=False): """ Integrate using the midpoint rule for spatial dimensions, and trapezium rule for diff --git a/xbout/geometries.py b/xbout/geometries.py index 6774c4fa..1a93e38c 100644 --- a/xbout/geometries.py +++ b/xbout/geometries.py @@ -7,6 +7,7 @@ from .region import Region, _create_regions_toroidal, _create_single_region from .utils import ( _add_attrs_to_var, + _make_1d_xcoord, _set_attrs_on_all_vars, _set_as_coord, _1d_coord_from_spacing, @@ -139,21 +140,7 @@ def apply_geometry(ds, geometry_name, *, coordinates=None, grid=None): updated_ds = updated_ds.drop_vars("t_array") if xcoord not in updated_ds.coords: - # Make index 'x' a coordinate, useful for handling global indexing - # Note we have to use the index value, not the value calculated from 'dx' because - # 'dx' may not be consistent between different regions (e.g. core and PFR). - # For some geometries xcoord may have already been created by - # add_geometry_coords, in which case we do not need this. - nx = updated_ds.dims[xcoord] - - # can't use commented out version, uncommented one works around xarray bug - # removing attrs - # https://github.com/pydata/xarray/issues/4415 - # https://github.com/pydata/xarray/issues/4393 - # updated_ds = updated_ds.assign_coords(**{xcoord: np.arange(nx)}) - updated_ds[xcoord] = (xcoord, np.arange(nx)) - - _add_attrs_to_var(updated_ds, xcoord) + _make_1d_xcoord(updated_ds) if ycoord not in updated_ds.coords: ny = updated_ds.dims[ycoord] diff --git a/xbout/tests/test_utils.py b/xbout/tests/test_utils.py index a7c9b8e1..b5255bcc 100644 --- a/xbout/tests/test_utils.py +++ b/xbout/tests/test_utils.py @@ -6,7 +6,7 @@ from xbout.utils import ( _set_attrs_on_all_vars, - _update_metadata_increased_resolution, + _update_metadata_increased_y_resolution, _1d_coord_from_spacing, ) @@ -54,7 +54,7 @@ def test__update_metadata_increased_resolution(self): "MYSUB": 7, } - da = _update_metadata_increased_resolution(da, 3) + da = _update_metadata_increased_y_resolution(da, n=3) assert da.metadata["jyseps1_1"] == 5 assert da.metadata["jyseps2_1"] == 8 diff --git a/xbout/utils.py b/xbout/utils.py index 3c67723d..e9154d02 100644 --- a/xbout/utils.py +++ b/xbout/utils.py @@ -81,7 +81,46 @@ def _separate_metadata(ds): return ds.drop_vars(scalar_vars), metadata -def _update_metadata_increased_resolution(da, n): +def _update_metadata_increased_x_resolution(da, *, ixseps1=None, ixseps2=None, nx=None): + """ + Update the metadata variables to account for a change in x-direction resolution. + + Parameters + ---------- + da : DataArray + The variable to update + ixseps1 : int + The value to give to ixseps1 + ixseps2 : int + The value to give to ixseps2 + nx : int + The value to give to nx + """ + + # Take deepcopy to ensure we do not alter metadata of other variables + da.attrs["metadata"] = deepcopy(da.metadata) + + def set_var(var, value): + if value is None: + da[var] = -1 + else: + da[var] = value + + set_var("ixseps1", ixseps1) + set_var("ixseps2", ixseps2) + set_var("nx", nx) + if nx is not None: + da.metadata["MXSUB"] = nx - 2 * da.metadata["MXG"] + + # Update attrs of coordinates to be consistent with da + for coord in da.coords: + da[coord].attrs = {} + _add_attrs_to_var(da, coord) + + return da + + +def _update_metadata_increased_y_resolution(da, n): """ Update the metadata variables to account for a y-direction resolution increased by a factor n. @@ -195,6 +234,25 @@ def _1d_coord_from_spacing(spacing, dim, ds=None, *, origin_at=None): return xr.Variable(dim, coord_values) +def _make_1d_xcoord(ds_or_da): + # Make index 'x' a coordinate, useful for handling global indexing + # Note we have to use the index value, not the value calculated from 'dx' because + # 'dx' may not be consistent between different regions (e.g. core and PFR). + # For some geometries xcoord may have already been created by + # add_geometry_coords, in which case we do not need this. + xcoord = ds_or_da.metadata["bout_xdim"] + nx = ds_or_da.dims[xcoord] + + # can't use commented out version, uncommented one works around xarray bug + # removing attrs + # https://github.com/pydata/xarray/issues/4415 + # https://github.com/pydata/xarray/issues/4393 + # updated_ds = updated_ds.assign_coords(**{xcoord: np.arange(nx)}) + ds_or_da[xcoord] = (xcoord, np.arange(nx)) + + _add_attrs_to_var(ds_or_da, xcoord) + + def _check_new_nxpe(ds, nxpe): # Check nxpe is valid From 86b0fd7081c53fee8936a67439f320e09f034ffc Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 23 Dec 2021 21:44:21 +0000 Subject: [PATCH 02/12] Allow parallel interpolation to be calculated using poloidal distance --- xbout/boutdataarray.py | 164 ++++++++++++++++++++++++++++++----------- xbout/utils.py | 41 ++++++++--- 2 files changed, 150 insertions(+), 55 deletions(-) diff --git a/xbout/boutdataarray.py b/xbout/boutdataarray.py index 914b6b9d..baf8a6f6 100644 --- a/xbout/boutdataarray.py +++ b/xbout/boutdataarray.py @@ -241,6 +241,8 @@ def interpolate_parallel( self, region=None, *, + poloidal_distance=None, + dy=None, n=None, toroidal_points=None, method="cubic", @@ -256,9 +258,18 @@ def interpolate_parallel( By default, return a result with all regions interpolated separately and then combined. If an explicit region argument is passed, then return the variable from only that region. + poloidal_distance : 2d array, optional + Poloidal distance values to interpolate to - interpolation is calculated as + a function of poloidal distance along psi contours. Should have the same + radial grid size as the input. If not given, `n` is used instead. + dy : 2d array, optional + New values of `dy`, corresponding to the values of `poloidal_distance`. + Required if `poloidal_distance` is passed. n : int, optional The factor to increase the resolution by. Defaults to the value set by BoutDataset.setupParallelInterp(), or 10 if that has not been called. + If `n` is used, interpolation is onto a linearly spaced grid in grid-index + space. toroidal_points : int or sequence of int, optional If int, number of toroidal points to output, applies a stride to toroidal direction to save memory usage. If sequence of int, the indexes of toroidal @@ -280,11 +291,28 @@ def interpolate_parallel( if region is None: # Call the single-region version of this method for each region, and combine # the results together + if poloidal_distance is None: + poloidal_distance_parts = [None for _ in self._regions] + else: + poloidal_distance_parts = [ + poloidal_distance.from_region( + region, with_guards={xcoord: 2, ycoord: 0} + ) + .isel({ycoord: 0}, drop=True) + .data + for region in self._regions + ] parts = [ self.interpolate_parallel( - region, n=n, toroidal_points=toroidal_points, method=method + region, + poloidal_distance=this_poloidal_distance, + n=n, + toroidal_points=toroidal_points, + method=method, ).bout.to_dataset() - for region in self._regions + for (region, this_poloidal_distance) in zip( + self._regions, poloidal_distance_parts + ) ] # 'region' is not the same for all parts, and should not exist in the result, @@ -320,57 +348,107 @@ def interpolate_parallel( else: aligned_input = True - if n is None: - n = self.fine_interpolation_factor + if poloidal_distance is not None: + poloidal_distance = poloidal_distance.copy() + # Need to delete ycoord to avoid a clash below + del poloidal_distance[ycoord] - da = da.chunk({ycoord: None}) + if n is not None: + raise ValueError( + f"poloidal_distance and n cannot both be passed, got " + f"poloidal_distance={poloidal_distance} and n={n}" + ) + if dy is None: + raise ValueError() + + sizes = {k: v for (k, v) in da.sizes.items()} + sizes[ycoord] = poloidal_distance.sizes[ycoord] + result = xr.DataArray(np.zeros([sizes[d] for d in da.dims]), dims=da.dims) + result["poloidal_distance"] = poloidal_distance.drop_vars("x") + for coord in da.coords: + if ycoord not in da[coord].dims: + result[coord] = da[coord] + + for ix in range(da.sizes[xcoord]): + xsel = {xcoord: da[xcoord][ix].values} + # Need to make psi_poloidal a dimension coordinate in order to + # interpolate using it + result[ycoord] = result["poloidal_distance"].isel(xsel, drop=True).data + da[ycoord] = da["poloidal_distance"].isel(xsel, drop=True).data + result.loc[xsel] = ( + da.isel(xsel) + .interp( + {ycoord: poloidal_distance.isel(xsel)}, + assume_sorted=True, + method=method, + kwargs={"fill_value": "extrapolate"}, + ) + .data + ) - ny_fine = n * region.ny - dy = (region.yupper - region.ylower) / ny_fine + result.attrs = da.attrs.copy() + da = result - myg = da.metadata["MYG"] - if da.metadata["keep_yboundaries"] and region.connection_lower_y is None: - ybndry_lower = myg - else: - ybndry_lower = 0 - if da.metadata["keep_yboundaries"] and region.connection_upper_y is None: - ybndry_upper = myg + if dy is None: + raise ValueError( + "It is required to pass dy if poloidal_distance is passed" + ) + + da = _update_metadata_increased_y_resolution(da) + + da["dy"] = dy else: - ybndry_upper = 0 + if n is None: + n = self.fine_interpolation_factor - y_fine = np.linspace( - region.ylower - (ybndry_lower - 0.5) * dy, - region.yupper + (ybndry_upper - 0.5) * dy, - ny_fine + ybndry_lower + ybndry_upper, - ) + da = da.chunk({ycoord: None}) - # This prevents da.interp() from being very slow. - # Apparently large attrs (i.e. regions) on a coordinate which is passed as an - # argument to dask.array.map_blocks() slow things down, maybe because coordinates - # are numpy arrays, not dask arrays? - # Slow-down was introduced in d062fa9e75c02fbfdd46e5d1104b9b12f034448f when - # _add_attrs_to_var(updated_ds, ycoord) was added in geometries.py - da[ycoord].attrs = {} + ny_fine = n * region.ny + dy = (region.yupper - region.ylower) / ny_fine - da = da.interp( - {ycoord: y_fine.data}, - assume_sorted=True, - method=method, - kwargs={"fill_value": "extrapolate"}, - ) + myg = da.metadata["MYG"] + if da.metadata["keep_yboundaries"] and region.connection_lower_y is None: + ybndry_lower = myg + else: + ybndry_lower = 0 + if da.metadata["keep_yboundaries"] and region.connection_upper_y is None: + ybndry_upper = myg + else: + ybndry_upper = 0 - da = _update_metadata_increased_y_resolution(da, n) + y_fine = np.linspace( + region.ylower - (ybndry_lower - 0.5) * dy, + region.yupper + (ybndry_upper - 0.5) * dy, + ny_fine + ybndry_lower + ybndry_upper, + ) - # Modify dy to be consistent with the higher resolution grid - dy_array = xr.DataArray( - np.full([da.sizes[xcoord], da.sizes[ycoord]], dy), dims=[xcoord, ycoord] - ) - da["dy"] = da["dy"].copy(data=dy_array) + # This prevents da.interp() from being very slow. + # Apparently large attrs (i.e. regions) on a coordinate which is passed as an + # argument to dask.array.map_blocks() slow things down, maybe because coordinates + # are numpy arrays, not dask arrays? + # Slow-down was introduced in d062fa9e75c02fbfdd46e5d1104b9b12f034448f when + # _add_attrs_to_var(updated_ds, ycoord) was added in geometries.py + da[ycoord].attrs = {} + + da = da.interp( + {ycoord: y_fine.data}, + assume_sorted=True, + method=method, + kwargs={"fill_value": "extrapolate"}, + ) - # Remove regions which have incorrect information for the high-resolution grid. - # New regions will be generated when creating a new Dataset in - # BoutDataset.getHighParallelResVars - del da.attrs["regions"] + da = _update_metadata_increased_y_resolution(da, n=n) + + # Modify dy to be consistent with the higher resolution grid + dy_array = xr.DataArray( + np.full([da.sizes[xcoord], da.sizes[ycoord]], dy), dims=[xcoord, ycoord] + ) + da["dy"] = da["dy"].copy(data=dy_array) + + # Remove regions which have incorrect information for the high-resolution grid. + # New regions will be generated when creating a new Dataset in + # BoutDataset.getHighParallelResVars + del da.attrs["regions"] if not aligned_input: # Want output in non-aligned coordinates diff --git a/xbout/utils.py b/xbout/utils.py index e9154d02..18224a4d 100644 --- a/xbout/utils.py +++ b/xbout/utils.py @@ -120,7 +120,17 @@ def set_var(var, value): return da -def _update_metadata_increased_y_resolution(da, n): +def _update_metadata_increased_y_resolution( + da, + *, + n=None, + jyseps1_1=None, + jyseps2_1=None, + jyseps1_2=None, + jyseps2_2=None, + ny_inner=None, + ny=None, +): """ Update the metadata variables to account for a y-direction resolution increased by a factor n. @@ -129,8 +139,9 @@ def _update_metadata_increased_y_resolution(da, n): ---------- da : DataArray The variable to update - n : int - The factor to increase the y-resolution by + n : int, optional + The factor to increase the y-resolution by. If n is not given, y-dependent + metadata variables are set to -1, assuming they will be corrected later. """ # Take deepcopy to ensure we do not alter metadata of other variables @@ -139,19 +150,25 @@ def _update_metadata_increased_y_resolution(da, n): def update_jyseps(name): # If any jyseps<=0, need to leave as is if da.metadata[name] > 0: - da.metadata[name] = n * (da.metadata[name] + 1) - 1 + if n is None: + da.metadata[name] = -1 + else: + da.metadata[name] = n * (da.metadata[name] + 1) - 1 - update_jyseps("jyseps1_1") - update_jyseps("jyseps2_1") - update_jyseps("jyseps1_2") - update_jyseps("jyseps2_2") + update_jyseps("jyseps1_1", jyseps1_1) + update_jyseps("jyseps2_1", jyseps2_1) + update_jyseps("jyseps1_2", jyseps1_2) + update_jyseps("jyseps2_2", jyseps2_2) def update_ny(name): - da.metadata[name] = n * da.metadata[name] + if n is None: + da.metadata[name] = -1 + else: + da.metadata[name] = n * da.metadata[name] - update_ny("ny") - update_ny("ny_inner") - update_ny("MYSUB") + update_ny("ny", ny) + update_ny("ny_inner", ny_inner) + update_ny("MYSUB", None) # Update attrs of coordinates to be consistent with da for coord in da.coords: From ce7aa324a821ba2a6b73cd8aa2aea18b3d87773a Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 23 Dec 2021 22:32:30 +0000 Subject: [PATCH 03/12] Method to interpolate variables onto grid given by new grid file The new grid file should represent the same equilibrium as the original grid. --- xbout/boutdataarray.py | 146 ++++++++++++++++++++++++++++++++++++++--- xbout/boutdataset.py | 109 ++++++++++++++++++++++++++++++ xbout/utils.py | 20 ++++-- 3 files changed, 263 insertions(+), 12 deletions(-) diff --git a/xbout/boutdataarray.py b/xbout/boutdataarray.py index baf8a6f6..ff30cc57 100644 --- a/xbout/boutdataarray.py +++ b/xbout/boutdataarray.py @@ -11,6 +11,7 @@ from xarray import register_dataarray_accessor from .geometries import apply_geometry +from .load import open_boutdataset from .plotting.animate import animate_poloidal, animate_pcolormesh, animate_line from .plotting import plotfuncs from .plotting.utils import _create_norm @@ -257,7 +258,8 @@ def interpolate_parallel( region : str, optional By default, return a result with all regions interpolated separately and then combined. If an explicit region argument is passed, then return the variable - from only that region. + from only that region. If the DataArray has already been restricted to a + single region, pass `region=False` to skip calling `from_region()` again. poloidal_distance : 2d array, optional Poloidal distance values to interpolate to - interpolation is calculated as a function of poloidal distance along psi contours. Should have the same @@ -332,15 +334,16 @@ def interpolate_parallel( result = apply_geometry(result, self.data.geometry) return result[self.data.name] - # Select a particular 'region' and interpolate to higher parallel resolution - da = self.data - region = da.bout._regions[region] + da = self.data.copy() tcoord = da.metadata["bout_tdim"] xcoord = da.metadata["bout_xdim"] ycoord = da.metadata["bout_ydim"] zcoord = da.metadata["bout_zdim"] - da = da.bout.from_region(region.name, with_guards={xcoord: 0, ycoord: 2}) + if region is not False: + # Select a particular 'region' and interpolate to higher parallel resolution + region = da.bout._regions[region] + da = da.bout.from_region(region.name, with_guards={xcoord: 0, ycoord: 2}) if zcoord in da.dims and da.direction_y != "Aligned": aligned_input = False @@ -483,7 +486,8 @@ def interpolate_radial( region : str, optional By default, return a result with all regions interpolated separately and then combined. If an explicit region argument is passed, then return the variable - from only that region. + from only that region. If the DataArray has already been restricted to a + single region, pass `region=False` to skip calling `from_region()` again. psi : 1d or 2d array, optional Values of `psixy` to interpolate data to. If not given use `n` instead. If `psi` is given, it must be a 1d array with psi values for the region if @@ -555,10 +559,13 @@ def interpolate_radial( # ixseps2, which are needed to create the 'regions'. return result[self.data.name] - # Select a particular 'region' and interpolate to higher parallel resolution da = self.data - da = da.bout.from_region(region.name, with_guards={xcoord: 2, ycoord: 0}) + if region is not False: + # Select a particular 'region' and interpolate to higher parallel resolution + region = da.bout._regions[region] + da = da.bout.from_region(region.name, with_guards={xcoord: 2, ycoord: 0}) + da = da.chunk({xcoord: None}) old_psi = da["psi_poloidal"].isel({ycoord: 0}, drop=True).values @@ -628,6 +635,129 @@ def interpolate_radial( return da + def interpolate_to_new_grid( + self, new_gridfile, *, method="cubic", return_dataset=False + ): + """ + Interpolate the DataArray onto a new set of grid points, given by a grid file. + + The grid file is asssumed to represent the same equilibrium as the one + associated by the original DataArray, so that psi-values and poloidal distances + along psi-contours of the equilibrium are the same. + + Parameters + ---------- + new_gridfile : str, pathlib.Path or Dataset + Path to a new grid file, or grid file opened as a Dataset. + method : str, optional + The interpolation method to use. Options from xarray.DataArray.interp(), + currently: linear, nearest, zero, slinear, quadratic, cubic. Default is + 'cubic'. + return_dataset : bool, default False + Return the result as a Dataset containing the new DataArray. + """ + if not isinstance(new_gridfile, xr.Dataset): + new_gridfile = open_boutdataset( + new_gridfile, + keep_xboundaries=da.metadata["keep_xboundaries"], + keep_yboundaries=da.metadata["keep_yboundaries"], + drop_variables=["theta"], + info=False, + ) + new_gridfile = apply_geometry(new_gridfile, self.data.geometry) + + xcoord = self.data.metadata["bout_xdim"] + ycoord = self.data.metadata["bout_ydim"] + + da = self.data + + parts = [] + for region in self._regions: + # Note, need to set 0 x-guards here. If we include x-guards in the radial + # interpolation, poloidal_distance gets messed up at the edges for the + # parallel interpolation because poloidal_distance does not have to be + # consistent between different regions. + part = da.bout.from_region(region, with_guards={xcoord: 0, ycoord: 2}) + + # Radial interpolation first, because the psi coordinate is 1d (in each + # region), so does not need to be interpolated in y-direction, whereas + # poloidal_distance would need to be interpolated to the original + # DataArray's radial grid points. + psi_part = ( + new_gridfile["psi_poloidal"] + .bout.from_region(region, with_guards={xcoord: 0, ycoord: 0}) + .isel({ycoord: 0}, drop=True) + ) + dx_part = ( + new_gridfile["dx"] + .bout.from_region(region, with_guards={xcoord: 0, ycoord: 0}) + .isel({ycoord: 0}, drop=True) + ) + + part = part.bout.interpolate_radial( + False, + psi=psi_part, + dx=dx_part, + method=method, + return_dataset=return_dataset, + ) + + poloidal_distance_part = new_gridfile["poloidal_distance"].bout.from_region( + region, with_guards={xcoord: 0, ycoord: 0} + ) + dy_part = new_gridfile["dy"].bout.from_region( + region, with_guards={xcoord: 0, ycoord: 0} + ) + + part = part.bout.interpolate_parallel( + False, + poloidal_distance=poloidal_distance_part, + dy=dy_part, + method=method, + return_dataset=return_dataset, + ) + + # Get theta coordinate from new_gridfile, as interpolated versions may not + # be consistent between different regions. + part["theta"] = poloidal_distance_part["theta"] + + # 'region' is not the same for all parts, and should not exist in the result, + # so delete + if "region" in part.attrs: + del part.attrs["region"] + + parts.append(part.to_dataset(name=self.data.name)) + + result = xr.combine_by_coords(parts, combine_attrs="drop_conflicts") + + # Get attributes from original DataArray, then update for increased resolution + result.attrs = self.data.attrs + + result = _update_metadata_increased_x_resolution( + result, + ixseps1=new_gridfile.metadata["ixseps1"], + ixseps2=new_gridfile.metadata["ixseps2"], + nx=new_gridfile.metadata["nx"], + ) + result = _update_metadata_increased_y_resolution( + result, + jyseps1_1=new_gridfile.metadata["jyseps1_1"], + jyseps2_1=new_gridfile.metadata["jyseps2_1"], + jyseps1_2=new_gridfile.metadata["jyseps1_2"], + jyseps2_2=new_gridfile.metadata["jyseps2_2"], + ny_inner=new_gridfile.metadata["ny_inner"], + ny=new_gridfile.metadata["ny"], + ) + + _make_1d_xcoord(result) + + if return_dataset: + return result + else: + # Extract the DataArray to return + result = apply_geometry(result, self.data.geometry) + return result[self.data.name] + def remove_yboundaries(self, return_dataset=False, remove_extra_upper=False): """ Remove y-boundary points, if present, from the DataArray diff --git a/xbout/boutdataset.py b/xbout/boutdataset.py index e8b2868d..06136384 100644 --- a/xbout/boutdataset.py +++ b/xbout/boutdataset.py @@ -18,6 +18,7 @@ from dask.diagnostics import ProgressBar from .geometries import apply_geometry +from .load import open_boutdataset from .plotting.animate import ( animate_poloidal, animate_pcolormesh, @@ -349,6 +350,114 @@ def find_with_dims(first_var, dims): return ds + def interpolate_to_new_grid(self, variables, new_gridfile, **kwargs): + """ + Interpolate the DataSet onto a new set of grid points, given by a grid file. + + The grid file is asssumed to represent the same equilibrium as the one + associated by the original DataSet, so that psi-values and poloidal distances + along psi-contours of the equilibrium are the same. + + Parameters + ---------- + variables : str or sequence of str or ... + The names of the variables to interpolate. If 'variables=...' is passed + explicitly, then interpolate all variables in the Dataset. + new_gridfile : str, pathlib.Path or Dataset + Path to a new grid file, or grid file opened as a Dataset. + method : str, optional + The interpolation method to use. Options from xarray.DataSet.interp(), + currently: linear, nearest, zero, slinear, quadratic, cubic. Default is + 'cubic'. + + Returns + ------- + A new Dataset containing the variables interpolated to the new grid. The new + Dataset is a valid BoutDataset, although containing only the specified + variables. + """ + + print("Interpolating to new grid:") + + if variables is ...: + variables = [v for v in self.data] + + if not isinstance(new_gridfile, xr.Dataset): + new_gridfile = open_boutdataset( + new_gridfile, + keep_xboundaries=self.data.metadata["keep_xboundaries"], + keep_yboundaries=self.data.metadata["keep_yboundaries"], + drop_variables=["theta"], + info=False, + ) + new_gridfile = apply_geometry(new_gridfile, self.data.geometry) + + if isinstance(variables, str): + variables = [variables] + if isinstance(variables, tuple): + variables = list(variables) + + # Need to start with a Dataset with attrs as merge() drops the attrs of the + # passed-in argument. + # Make sure the first variable has all dimensions so we don't lose any + # coordinates + def find_with_dims(first_var, dims): + if first_var is None: + dims = set(dims) + for v in variables: + if set(self.data[v].dims) == dims: + first_var = v + break + return first_var + + tcoord = self.data.metadata.get("bout_tdim", "t") + zcoord = self.data.metadata.get("bout_zdim", "z") + first_var = find_with_dims(None, self.data.dims) + first_var = find_with_dims(first_var, set(self.data.dims) - set(tcoord)) + first_var = find_with_dims(first_var, set(self.data.dims) - set(zcoord)) + first_var = find_with_dims( + first_var, set(self.data.dims) - set([tcoord, zcoord]) + ) + if first_var is None: + raise ValueError( + f"Could not find variable to interpolate with both " + f"{ds.metadata.get('bout_xdim', 'x')} and " + f"{ds.metadata.get('bout_ydim', 'y')} dimensions" + ) + variables.remove(first_var) + print(first_var) + ds = self.data[first_var].bout.interpolate_to_new_grid( + new_gridfile, return_dataset=True, **kwargs + ) + xcoord = ds.metadata.get("bout_xdim", "x") + ycoord = ds.metadata.get("bout_ydim", "y") + for var in variables: + print(var) + da = self.data[var] + if xcoord in da.dims and ycoord in da.dims: + ds = ds.merge( + da.bout.interpolate_to_new_grid( + new_gridfile, return_dataset=True, **kwargs + ) + ) + elif xcoord in da.dims: + print( + f"{var} depends on x but not y, so do not know how to interpolate " + f"to new grid" + ) + elif ycoord in da.dims: + print( + f"{var} depends on y but not x, so do not know how to interpolate " + f"to new grid" + ) + else: + ds[var] = da + + # Apply geometry + ds = apply_geometry(ds, ds.geometry) + + return ds + def integrate_midpoints(self, variable, *, dims=None, cumulative_t=False): """ Integrate using the midpoint rule for spatial dimensions, and trapezium rule for diff --git a/xbout/utils.py b/xbout/utils.py index 18224a4d..ad734cb8 100644 --- a/xbout/utils.py +++ b/xbout/utils.py @@ -142,17 +142,24 @@ def _update_metadata_increased_y_resolution( n : int, optional The factor to increase the y-resolution by. If n is not given, y-dependent metadata variables are set to -1, assuming they will be corrected later. + jyseps1_1, jyseps2_1, jyseps1_2, jyseps2_2, ny_inner, ny : int + Metadata variables for y-grid. Should not be passed if `n` is passed. """ # Take deepcopy to ensure we do not alter metadata of other variables da.attrs["metadata"] = deepcopy(da.metadata) - def update_jyseps(name): + def update_jyseps(name, value): # If any jyseps<=0, need to leave as is if da.metadata[name] > 0: if n is None: - da.metadata[name] = -1 + if value is None: + da.metadata[name] = -1 + else: + da.metadata[name] = value else: + if value is not None: + raise ValueError(f"n set, but value also passed to {name}") da.metadata[name] = n * (da.metadata[name] + 1) - 1 update_jyseps("jyseps1_1", jyseps1_1) @@ -160,10 +167,15 @@ def update_jyseps(name): update_jyseps("jyseps1_2", jyseps1_2) update_jyseps("jyseps2_2", jyseps2_2) - def update_ny(name): + def update_ny(name, value): if n is None: - da.metadata[name] = -1 + if value is None: + da.metadata[name] = -1 + else: + da.metadata[name] = value else: + if value is not None: + raise ValueError(f"n set, but value also passed to {name}") da.metadata[name] = n * da.metadata[name] update_ny("ny", ny) From d4def6ae86a1348d1108c524530456b1e8888d86 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Fri, 24 Dec 2021 18:34:12 +0000 Subject: [PATCH 04/12] Use xr.apply_unfunc() to speed up interpolate_parallel When using poloidal_distance for parallel interpolation, calculation was previously very slow due to explicit loop over x. Using apply_ufunc() instead helps. --- xbout/boutdataarray.py | 83 ++++++++++++++++++++++++++++++------------ 1 file changed, 60 insertions(+), 23 deletions(-) diff --git a/xbout/boutdataarray.py b/xbout/boutdataarray.py index ff30cc57..3b89938c 100644 --- a/xbout/boutdataarray.py +++ b/xbout/boutdataarray.py @@ -253,6 +253,10 @@ def interpolate_parallel( Interpolate in the parallel direction to get a higher resolution version of the variable. + Note: when using poloidal_distance for interpolation, have to convert to numpy + arrays for calculation. This means that dask cannot be used to parallelise this + calculation, so may be slow for large Datasets. + Parameters ---------- region : str, optional @@ -293,9 +297,13 @@ def interpolate_parallel( if region is None: # Call the single-region version of this method for each region, and combine # the results together + + # apply_unfunc of scipy.interp1d() fails if data is a dask array + self.data.load() if poloidal_distance is None: poloidal_distance_parts = [None for _ in self._regions] else: + poloidal_distance.load() poloidal_distance_parts = [ poloidal_distance.from_region( region, with_guards={xcoord: 2, ycoord: 0} @@ -352,7 +360,14 @@ def interpolate_parallel( aligned_input = True if poloidal_distance is not None: + # apply_unfunc of scipy.interp1d() fails if data is a dask array + da.load() + poloidal_distance.load() + poloidal_distance = poloidal_distance.copy() + # Need to delete xcoord 'indexer', because it is not present on 'result', so + # would cause an error in apply_ufunc() if it was present. + del poloidal_distance[xcoord] # Need to delete ycoord to avoid a clash below del poloidal_distance[ycoord] @@ -364,30 +379,39 @@ def interpolate_parallel( if dy is None: raise ValueError() - sizes = {k: v for (k, v) in da.sizes.items()} - sizes[ycoord] = poloidal_distance.sizes[ycoord] - result = xr.DataArray(np.zeros([sizes[d] for d in da.dims]), dims=da.dims) - result["poloidal_distance"] = poloidal_distance.drop_vars("x") - for coord in da.coords: - if ycoord not in da[coord].dims: - result[coord] = da[coord] - - for ix in range(da.sizes[xcoord]): - xsel = {xcoord: da[xcoord][ix].values} - # Need to make psi_poloidal a dimension coordinate in order to - # interpolate using it - result[ycoord] = result["poloidal_distance"].isel(xsel, drop=True).data - da[ycoord] = da["poloidal_distance"].isel(xsel, drop=True).data - result.loc[xsel] = ( - da.isel(xsel) - .interp( - {ycoord: poloidal_distance.isel(xsel)}, - assume_sorted=True, - method=method, - kwargs={"fill_value": "extrapolate"}, - ) - .data + from scipy.interpolate import interp1d + + def y_interp_func( + data, poloidal_distance_in, poloidal_distance_out, method=None + ): + interp_func = interp1d( + poloidal_distance_in, data, kind=method, assume_sorted=True ) + return interp_func(poloidal_distance_out) + + # Need to give different name to output dimension to avoid clash + new_ycoord = ycoord + "_interpolate_to_new_grid_new_ycoord" + poloidal_distance = poloidal_distance.rename({ycoord: new_ycoord}) + result = xr.apply_ufunc( + y_interp_func, + da, + da["poloidal_distance"], + poloidal_distance, + method, + input_core_dims=[[ycoord], [ycoord], [new_ycoord], []], + output_core_dims=[[new_ycoord]], + exclude_dims=set([ycoord]), + vectorize=True, + dask="parallelized", + dask_gufunc_kwargs={ + "output_sizes": {new_ycoord: poloidal_distance.sizes[new_ycoord]} + }, + ) + # Rename new_ycoord back to ycoord for output + result = result.rename({new_ycoord: ycoord}) + + # Transpose to original dimension order + result = result.transpose(*da.dims) result.attrs = da.attrs.copy() da = result @@ -645,6 +669,11 @@ def interpolate_to_new_grid( associated by the original DataArray, so that psi-values and poloidal distances along psi-contours of the equilibrium are the same. + Note: poloidal_distance is used for parallel interpolation inside this method. + For this, have to convert to numpy arrays for calculation. Means that dask + cannot be used to parallelise that part of the calculation, so this method may + be slow for large Datasets. + Parameters ---------- new_gridfile : str, pathlib.Path or Dataset @@ -671,6 +700,10 @@ def interpolate_to_new_grid( da = self.data + # apply_unfunc() of scipy.interp1d() fails with dask arrays, so load + da.load() + new_gridfile["poloidal_distance"].load() + parts = [] for region in self._regions: # Note, need to set 0 x-guards here. If we include x-guards in the radial @@ -709,6 +742,10 @@ def interpolate_to_new_grid( region, with_guards={xcoord: 0, ycoord: 0} ) + # apply_unfunc() of scipy.interp1d() fails with dask arrays, so load + part.load() + poloidal_distance_part.load() + part = part.bout.interpolate_parallel( False, poloidal_distance=poloidal_distance_part, From c95155a8e5f0295e7c7e4fe459cea826a39bd4f3 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Fri, 24 Dec 2021 23:48:45 +0000 Subject: [PATCH 05/12] PEP8 fixes --- xbout/boutdataarray.py | 31 ++++++++++++++++--------------- xbout/boutdataset.py | 3 ++- xbout/region.py | 8 ++++---- 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/xbout/boutdataarray.py b/xbout/boutdataarray.py index 3b89938c..012070f2 100644 --- a/xbout/boutdataarray.py +++ b/xbout/boutdataarray.py @@ -450,9 +450,9 @@ def y_interp_func( ) # This prevents da.interp() from being very slow. - # Apparently large attrs (i.e. regions) on a coordinate which is passed as an - # argument to dask.array.map_blocks() slow things down, maybe because coordinates - # are numpy arrays, not dask arrays? + # Apparently large attrs (i.e. regions) on a coordinate which is passed as + # an argument to dask.array.map_blocks() slow things down, maybe because + # coordinates are numpy arrays, not dask arrays? # Slow-down was introduced in d062fa9e75c02fbfdd46e5d1104b9b12f034448f when # _add_attrs_to_var(updated_ds, ycoord) was added in geometries.py da[ycoord].attrs = {} @@ -472,8 +472,8 @@ def y_interp_func( ) da["dy"] = da["dy"].copy(data=dy_array) - # Remove regions which have incorrect information for the high-resolution grid. - # New regions will be generated when creating a new Dataset in + # Remove regions which have incorrect information for the high-resolution + # grid. New regions will be generated when creating a new Dataset in # BoutDataset.getHighParallelResVars del da.attrs["regions"] @@ -508,10 +508,11 @@ def interpolate_radial( Parameters ---------- region : str, optional - By default, return a result with all regions interpolated separately and then - combined. If an explicit region argument is passed, then return the variable - from only that region. If the DataArray has already been restricted to a - single region, pass `region=False` to skip calling `from_region()` again. + By default, return a result with all regions interpolated separately and + then combined. If an explicit region argument is passed, then return the + variable from only that region. If the DataArray has already been restricted + to a single region, pass `region=False` to skip calling `from_region()` + again. psi : 1d or 2d array, optional Values of `psixy` to interpolate data to. If not given use `n` instead. If `psi` is given, it must be a 1d array with psi values for the region if @@ -527,8 +528,8 @@ def interpolate_radial( currently: linear, nearest, zero, slinear, quadratic, cubic. Default is 'cubic'. return_dataset : bool, optional - If this is set to True, return a Dataset containing this variable as a member - (by default returns a DataArray). Only used when region=None. + If this is set to True, return a Dataset containing this variable as a + member (by default returns a DataArray). Only used when region=None. Returns ------- @@ -563,8 +564,8 @@ def interpolate_radial( for (region, this_psi) in zip(self._regions, psi_parts) ] - # 'region' is not the same for all parts, and should not exist in the result, - # so delete before merging + # 'region' is not the same for all parts, and should not exist in the + # result, so delete before merging for part in parts: if "region" in part.attrs: del part.attrs["region"] @@ -758,8 +759,8 @@ def interpolate_to_new_grid( # be consistent between different regions. part["theta"] = poloidal_distance_part["theta"] - # 'region' is not the same for all parts, and should not exist in the result, - # so delete + # 'region' is not the same for all parts, and should not exist in the + # result, so delete if "region" in part.attrs: del part.attrs["region"] diff --git a/xbout/boutdataset.py b/xbout/boutdataset.py index 06136384..83f0ce00 100644 --- a/xbout/boutdataset.py +++ b/xbout/boutdataset.py @@ -295,7 +295,8 @@ def interpolate_radial(self, variables, **kwargs): Returns ------- A new Dataset containing a high-resolution versions of the variables. The new - Dataset is a valid BoutDataset, although containing only the specified variables. + Dataset is a valid BoutDataset, although containing only the specified + variables. """ if variables is ...: diff --git a/xbout/region.py b/xbout/region.py index 26578f73..56634f3e 100644 --- a/xbout/region.py +++ b/xbout/region.py @@ -1576,8 +1576,8 @@ def _concat_lower_guards(da, da_global, mxg, myg): # poloidal_distance_ylow should be zero at the boundary of this region poloidal_distance_bottom = da["poloidal_distance_ylow"].isel({ycoord: 0}) if all(abs(poloidal_distance_bottom) < 1.0e-16): - # Offset so that the poloidal_distance in da_lower is continuous from the - # poloidal_distance in this region. + # Offset so that the poloidal_distance in da_lower is continuous from + # the poloidal_distance in this region. # Expect there to be y-boundary cells in the Dataset, this will probably # fail if there are not. total_poloidal_distance = da["total_poloidal_distance"] @@ -1694,8 +1694,8 @@ def _concat_upper_guards(da, da_global, mxg, myg): {ycoord: 0} ) if all(abs(poloidal_distance_bottom) < 1.0e-16): - # Offset so that the poloidal_distance in da_upper is continuous from the - # poloidal_distance in this region. + # Offset so that the poloidal_distance in da_upper is continuous from + # the poloidal_distance in this region. # Expect there to be y-boundary cells in the Dataset, this will probably # fail if there are not. total_poloidal_distance = da["total_poloidal_distance"] From 67ded49de1f24dcf5eb3d5c35c6a58f43283874d Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sat, 25 Dec 2021 01:38:58 +0000 Subject: [PATCH 06/12] Fix _update_metadata_increased_x_resolution() Need to set values in metadata, not directly in da. --- xbout/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xbout/utils.py b/xbout/utils.py index ad734cb8..2675e7a7 100644 --- a/xbout/utils.py +++ b/xbout/utils.py @@ -102,9 +102,9 @@ def _update_metadata_increased_x_resolution(da, *, ixseps1=None, ixseps2=None, n def set_var(var, value): if value is None: - da[var] = -1 + da.metadata[var] = -1 else: - da[var] = value + da.metadata[var] = value set_var("ixseps1", ixseps1) set_var("ixseps2", ixseps2) From 491d89713234cbcd0b168404bbdd66595cf4b87c Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sat, 25 Dec 2021 12:49:17 +0000 Subject: [PATCH 07/12] Apply geometry in open_boutdataset call when loading new_gridfile MXG and MYG are now set correctly in _open_grid(), so can apply geometry just by passing the argument to open_boutdataset() now. --- xbout/boutdataarray.py | 2 +- xbout/boutdataset.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/xbout/boutdataarray.py b/xbout/boutdataarray.py index 012070f2..563c19a5 100644 --- a/xbout/boutdataarray.py +++ b/xbout/boutdataarray.py @@ -693,8 +693,8 @@ def interpolate_to_new_grid( keep_yboundaries=da.metadata["keep_yboundaries"], drop_variables=["theta"], info=False, + geometry=self.data.geometry, ) - new_gridfile = apply_geometry(new_gridfile, self.data.geometry) xcoord = self.data.metadata["bout_xdim"] ycoord = self.data.metadata["bout_ydim"] diff --git a/xbout/boutdataset.py b/xbout/boutdataset.py index 83f0ce00..dff09af1 100644 --- a/xbout/boutdataset.py +++ b/xbout/boutdataset.py @@ -390,8 +390,8 @@ def interpolate_to_new_grid(self, variables, new_gridfile, **kwargs): keep_yboundaries=self.data.metadata["keep_yboundaries"], drop_variables=["theta"], info=False, + geometry=self.data.geometry, ) - new_gridfile = apply_geometry(new_gridfile, self.data.geometry) if isinstance(variables, str): variables = [variables] From 98778893a2da34c1f211955987b82f33a6db2d88 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sat, 25 Dec 2021 12:50:04 +0000 Subject: [PATCH 08/12] Work around NaN in corner cells of zShift When zShift is saved by BOUT++ it may have NaN in some corner boundary/guard cells. When doing FFTs of arrays, these NaNs can make the whole output NaN, even in entries that should not depend on the corner cells of zShift. To avoid this, replace the NaNs with 0 - nothing important should depend on the corner cell values, so this should not cause problems. --- xbout/boutdataarray.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/xbout/boutdataarray.py b/xbout/boutdataarray.py index 563c19a5..0886734a 100644 --- a/xbout/boutdataarray.py +++ b/xbout/boutdataarray.py @@ -158,7 +158,11 @@ def to_field_aligned(self): f"argument to open_boutdataset()?" ) - result = self._shift_z(self.data[zShift_coord]) + # zShift may have NaNs in the corners. These should not affect any useful + # results, but may cause parts or all of arrays to be filled with NaN, even + # where the entries should not depend on the NaN values. Replace NaN with 0 to + # avoid this. + result = self._shift_z(self.data[zShift_coord].fillna(0.0)) result.attrs["direction_y"] = "Aligned" return result @@ -190,7 +194,11 @@ def from_field_aligned(self): f"argument to open_boutdataset()?" ) - result = self._shift_z(-self.data[zShift_coord]) + # zShift may have NaNs in the corners. These should not affect any useful + # results, but may cause parts or all of arrays to be filled with NaN, even + # where the entries should not depend on the NaN values. Replace NaN with 0 to + # avoid this. + result = self._shift_z(-self.data[zShift_coord].fillna(0.0)) result.attrs["direction_y"] = "Standard" return result From de083320a3f6e81b0d0f1b985b51796c0d7956a1 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sat, 25 Dec 2021 00:29:33 +0000 Subject: [PATCH 09/12] Add option to do radial interpolation on field-aligned grid ... in interpolate_to_new_grid(). --- xbout/boutdataarray.py | 21 ++++++++++++++++++++- xbout/boutdataset.py | 7 +++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/xbout/boutdataarray.py b/xbout/boutdataarray.py index 0886734a..2ee9f134 100644 --- a/xbout/boutdataarray.py +++ b/xbout/boutdataarray.py @@ -669,7 +669,12 @@ def interpolate_radial( return da def interpolate_to_new_grid( - self, new_gridfile, *, method="cubic", return_dataset=False + self, + new_gridfile, + *, + field_aligned_radial_interpolation=False, + method="cubic", + return_dataset=False, ): """ Interpolate the DataArray onto a new set of grid points, given by a grid file. @@ -687,6 +692,13 @@ def interpolate_to_new_grid( ---------- new_gridfile : str, pathlib.Path or Dataset Path to a new grid file, or grid file opened as a Dataset. + field_aligned_radial_interpolation : bool, default False + If set to True, transform to field-aligned grid for radial interpolation + (parallel interpolation is always on field-aligned grid). Probably less + accurate, at least in some parts of the grid where integrated shear is high, + but may (especially if most of the turbulence is at the outboard midplane) + produce a result that is better field-aligned and so creates less of an + initial transient when restarting. method : str, optional The interpolation method to use. Options from xarray.DataArray.interp(), currently: linear, nearest, zero, slinear, quadratic, cubic. Default is @@ -706,6 +718,7 @@ def interpolate_to_new_grid( xcoord = self.data.metadata["bout_xdim"] ycoord = self.data.metadata["bout_ydim"] + zcoord = self.data.metadata["bout_zdim"] da = self.data @@ -736,6 +749,9 @@ def interpolate_to_new_grid( .isel({ycoord: 0}, drop=True) ) + if field_aligned_radial_interpolation and zcoord in part.dims: + part = part.bout.to_field_aligned() + part = part.bout.interpolate_radial( False, psi=psi_part, @@ -763,6 +779,9 @@ def interpolate_to_new_grid( return_dataset=return_dataset, ) + if field_aligned_radial_interpolation and zcoord in part.dims: + part = part.bout.from_field_aligned() + # Get theta coordinate from new_gridfile, as interpolated versions may not # be consistent between different regions. part["theta"] = poloidal_distance_part["theta"] diff --git a/xbout/boutdataset.py b/xbout/boutdataset.py index dff09af1..c128e061 100644 --- a/xbout/boutdataset.py +++ b/xbout/boutdataset.py @@ -366,6 +366,13 @@ def interpolate_to_new_grid(self, variables, new_gridfile, **kwargs): explicitly, then interpolate all variables in the Dataset. new_gridfile : str, pathlib.Path or Dataset Path to a new grid file, or grid file opened as a Dataset. + field_aligned_radial_interpolation : bool, default False + If set to True, transform to field-aligned grid for radial interpolation + (parallel interpolation is always on field-aligned grid). Probably less + accurate, at least in some parts of the grid where integrated shear is high, + but may (especially if most of the turbulence is at the outboard midplane) + produce a result that is better field-aligned and so creates less of an + initial transient when restarting. method : str, optional The interpolation method to use. Options from xarray.DataSet.interp(), currently: linear, nearest, zero, slinear, quadratic, cubic. Default is From f3f1667a990e35298b4878d3b024b896e4b00dcb Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 27 Dec 2021 18:33:22 +0000 Subject: [PATCH 10/12] Move lists of variables, attributes, etc. from BOUT++ into separate file Avoids circular dependencies when importing these lists into different files. --- xbout/bout_info.py | 16 ++++++++++++++++ xbout/load.py | 24 +++++------------------- 2 files changed, 21 insertions(+), 19 deletions(-) create mode 100644 xbout/bout_info.py diff --git a/xbout/bout_info.py b/xbout/bout_info.py new file mode 100644 index 00000000..5b2b8b41 --- /dev/null +++ b/xbout/bout_info.py @@ -0,0 +1,16 @@ +_BOUT_PER_PROC_VARIABLES = [ + "wall_time", + "wtime", + "wtime_rhs", + "wtime_invert", + "wtime_comms", + "wtime_io", + "wtime_per_rhs", + "wtime_per_rhs_e", + "wtime_per_rhs_i", + "PE_XIND", + "PE_YIND", + "MYPE", +] +_BOUT_PER_PROC_VARIABLES_REQUIRED_FROM_RESTARTS = ["hist_hi", "tt"] +_BOUT_TIME_DEPENDENT_META_VARS = ["iteration"] diff --git a/xbout/load.py b/xbout/load.py index e4e5d9b7..e4ea57bb 100644 --- a/xbout/load.py +++ b/xbout/load.py @@ -11,27 +11,13 @@ from natsort import natsorted from . import geometries +from .bout_info import ( + _BOUT_PER_PROC_VARIABLES, + _BOUT_PER_PROC_VARIABLES_REQUIRED_FROM_RESTARTS, + _BOUT_TIME_DEPENDENT_META_VARS, +) from .utils import _set_attrs_on_all_vars, _separate_metadata, _check_filetype, _is_path - -_BOUT_PER_PROC_VARIABLES = [ - "wall_time", - "wtime", - "wtime_rhs", - "wtime_invert", - "wtime_comms", - "wtime_io", - "wtime_per_rhs", - "wtime_per_rhs_e", - "wtime_per_rhs_i", - "PE_XIND", - "PE_YIND", - "MYPE", -] -_BOUT_PER_PROC_VARIABLES_REQUIRED_FROM_RESTARTS = ["hist_hi", "tt"] -_BOUT_TIME_DEPENDENT_META_VARS = ["iteration"] - - # This code should run whenever any function from this module is imported # Set all attrs to survive all mathematical operations # (see https://github.com/pydata/xarray/pull/2482) From 27f30b1cf18c91da094bf6c2ced03441b30ba5df Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 27 Dec 2021 18:34:17 +0000 Subject: [PATCH 11/12] Preserve attributes created by BOUT++ when creating restart files The attributes "cell_location", "direction_y", "direction_z", "bout_type" are written by BOUT++ for each variable, so should be preserved. --- xbout/bout_info.py | 1 + xbout/utils.py | 11 +++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/xbout/bout_info.py b/xbout/bout_info.py index 5b2b8b41..24e91e39 100644 --- a/xbout/bout_info.py +++ b/xbout/bout_info.py @@ -14,3 +14,4 @@ ] _BOUT_PER_PROC_VARIABLES_REQUIRED_FROM_RESTARTS = ["hist_hi", "tt"] _BOUT_TIME_DEPENDENT_META_VARS = ["iteration"] +_BOUT_VARIABLE_ATTRIBUTES = ["cell_location", "direction_y", "direction_z", "bout_type"] diff --git a/xbout/utils.py b/xbout/utils.py index 2675e7a7..bd711da9 100644 --- a/xbout/utils.py +++ b/xbout/utils.py @@ -5,6 +5,8 @@ import numpy as np import xarray as xr +from .bout_info import _BOUT_VARIABLE_ATTRIBUTES + def _set_attrs_on_all_vars(ds, key, attr_data, copy=False): ds.attrs[key] = attr_data @@ -527,8 +529,13 @@ def _split_into_restarts(ds, variables, savepath, nxpe, nype, tind, prefix, over for v in variables: data_variable = ds_slice[v].variable - # delete attrs so we don't try to save metadata dict to restart files - data_variable.attrs = {} + # delete attrs, except for those that were created by BOUT++, so we + # don't try to save metadata dict to restart files + data_variable.attrs = { + k: v + for k, v in data_variable.attrs.items() + if k in _BOUT_VARIABLE_ATTRIBUTES + } restart_ds[v] = data_variable for v in ds.metadata: From 7beece4d39e4cf9b97dcd22343b0a35736f853b1 Mon Sep 17 00:00:00 2001 From: Mike Kryjak Date: Fri, 17 Mar 2023 13:34:56 +0000 Subject: [PATCH 12/12] Fix metadata typo --- xbout/boutdataarray.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/xbout/boutdataarray.py b/xbout/boutdataarray.py index 120bcaf7..9aea1d43 100644 --- a/xbout/boutdataarray.py +++ b/xbout/boutdataarray.py @@ -707,6 +707,9 @@ def interpolate_to_new_grid( return_dataset : bool, default False Return the result as a Dataset containing the new DataArray. """ + + da = self.data + if not isinstance(new_gridfile, xr.Dataset): new_gridfile = open_boutdataset( new_gridfile, @@ -717,11 +720,9 @@ def interpolate_to_new_grid( geometry=self.data.geometry, ) - xcoord = self.data.metadata["bout_xdim"] - ycoord = self.data.metadata["bout_ydim"] - zcoord = self.data.metadata["bout_zdim"] - - da = self.data + xcoord = da.metadata["bout_xdim"] + ycoord = da.metadata["bout_ydim"] + zcoord = da.metadata["bout_zdim"] # apply_unfunc() of scipy.interp1d() fails with dask arrays, so load da.load()