diff --git a/src/openeo_processes/arrays.py b/src/openeo_processes/arrays.py index fab7dffa..4c76b52a 100644 --- a/src/openeo_processes/arrays.py +++ b/src/openeo_processes/arrays.py @@ -923,8 +923,6 @@ def exec_xar(data, ignore_nodata=True, dimension=None): while (data_first.fillna(999) != data_first).any() and i < len(data_dim): data_first = data_first.fillna(data_dim[i]) i += 1 - else: - pass return data_first @staticmethod @@ -1030,8 +1028,6 @@ def exec_xar(data, ignore_nodata=True, dimension=None): while (data_last.fillna(999) != data_last).any() and i >= 0: data_last = data_last.fillna(data_dim[i]) i -= 1 - else: - pass return data_last @staticmethod diff --git a/src/openeo_processes/comparison.py b/src/openeo_processes/comparison.py index 7744bf83..a07e62d4 100644 --- a/src/openeo_processes/comparison.py +++ b/src/openeo_processes/comparison.py @@ -5,6 +5,7 @@ import xarray as xr from openeo_processes.utils import process from openeo_processes.utils import str2time +from openeo_processes.utils import keep_attrs # TODO: test if this works for different data types @@ -383,9 +384,9 @@ def exec_xar(x, y, delta=False, case_sensitive=True, reduce=False): # TODO: add Parameters ---------- - x : xr.DataArray + x : xr.DataArray, integer, float First operand. - y : xr.DataArray + y : xr.DataArray, integer, float Second operand. delta : float, optional Only applicable for comparing two arrays containing numbers. If this optional parameter is set to a @@ -430,7 +431,7 @@ def exec_xar(x, y, delta=False, case_sensitive=True, reduce=False): # TODO: add ar_eq = x_time == y_time # comparison of dates else: ar_eq = x == y - + ar_eq = keep_attrs(x, y, ar_eq) if reduce: return ar_eq.all() else: @@ -546,9 +547,9 @@ def exec_xar(x, y, delta=None, case_sensitive=True, reduce=False): # TODO: add Parameters ---------- - x : xr.DataArray + x : xr.DataArray, integer, float First operand. - y : xr.DataArray + y : xr.DataArray, integer, float Second operand. delta : float, optional Only applicable for comparing two arrays containing numbers. If this optional parameter is set to a @@ -687,7 +688,7 @@ def exec_xar(x, y, reduce=False): Parameters ---------- - x : xr.DataArray + x : xr.DataArray, integer, float First operand. y : xr.DataArray, integer, float Second operand. @@ -700,18 +701,17 @@ def exec_xar(x, y, reduce=False): xr.DataArray: : Returns True if `x` is strictly greater than `y`, None if any operand is None, otherwise False. """ - - ## x has to be a datacube, whereas y can be another datacube, an integer or a float + if x is None or y is None: return None - elif isinstance(y, xr.DataArray) or isinstance(y, int) or isinstance(y, float): + elif type(x) in [int, float, xr.DataArray] and type(y) in [int, float, xr.DataArray]: gt_ar = x > y + gt_ar = keep_attrs(x, y, gt_ar) if reduce: return gt_ar.all() else: return gt_ar - else: - return False + return False @staticmethod def exec_da(): @@ -827,9 +827,9 @@ def exec_xar(x, y, reduce = False): Parameters ---------- - x : xr.DataArray + x : xr.DataArray, integer, float First operand. - y : xr.DataArray + y : xr.DataArray, integer, float Second operand. reduce : bool, optional If True, one value will be returned. @@ -843,14 +843,14 @@ def exec_xar(x, y, reduce = False): """ if x is None or y is None: return None - elif isinstance(y, xr.DataArray) or isinstance(y, int) or isinstance(y, float): + elif type(x) in [int, float, xr.DataArray] and type(y) in [int, float, xr.DataArray]: gte_ar = ((x-y) >= 0) + gte_ar = keep_attrs(x, y, gte_ar) if reduce: return gte_ar.all() else: return gte_ar - else: - return False + return False @staticmethod def exec_da(): @@ -966,9 +966,9 @@ def exec_xar(x, y, reduce = False): Parameters ---------- - x : xr.DataArray + x : xr.DataArray, integer, float First operand. - y : xr.DataArray + y : xr.DataArray, integer, float Second operand. reduce : bool, optional If True, one value will be returned. @@ -980,17 +980,16 @@ def exec_xar(x, y, reduce = False): Returns True if `x` is strictly lower than `y`, None if any operand is None, otherwise False. """ - ## x has to be a datacube, whereas y can be another datacube, an integer or a float if x is None or y is None: return None - elif isinstance(y, xr.DataArray) or isinstance(y, int) or isinstance(y, float): + elif type(x) in [int, float, xr.DataArray] and type(y) in [int, float, xr.DataArray]: lt_ar = x < y + lt_ar = keep_attrs(x, y, lt_ar) if reduce: return lt_ar.all() else: return lt_ar - else: - return False + return False @staticmethod def exec_da(): @@ -1106,9 +1105,9 @@ def exec_xar(x, y, reduce = False): Parameters ---------- - x : xr.DataArray + x : xr.DataArray, integer, float First operand. - y : xr.DataArray + y : xr.DataArray, integer, float Second operand. reduce : bool, optional If True, one value will be returned. @@ -1120,17 +1119,16 @@ def exec_xar(x, y, reduce = False): Returns True if `x` is strictly lower than or equal to `y`, None if any operand is None, otherwise False. """ - ## x has to be a datacube, whereas y can be another datacube, an integer or a float if x is None or y is None: return None - elif isinstance(y, xr.DataArray) or isinstance(y, int) or isinstance(y, float): + elif type(x) in [int, float, xr.DataArray] and type(y) in [int, float, xr.DataArray]: lte_ar = x <= y + lte_ar = keep_attrs(x, y, lte_ar) if reduce: return lte_ar.all() else: return lte_ar - else: - return False + return False @staticmethod def exec_da(): @@ -1286,9 +1284,12 @@ def exec_xar(x, min, max, exclude_max=False, reduce=False): return False if exclude_max: - return xr.ufuncs.logical_and(Gte.exec_xar(x, min, reduce=reduce) , Lt.exec_xar(x, max, reduce=reduce)) + bet = xr.ufuncs.logical_and(Gte.exec_xar(x, min, reduce=reduce) , Lt.exec_xar(x, max, reduce=reduce)) else: - return xr.ufuncs.logical_and(Gte.exec_xar(x, min, reduce=reduce) , Lte.exec_xar(x, max, reduce=reduce)) + bet = xr.ufuncs.logical_and(Gte.exec_xar(x, min, reduce=reduce) , Lte.exec_xar(x, max, reduce=reduce)) + if isinstance(x, xr.DataArray): + bet.attrs = x.attrs + return bet @staticmethod def exec_da(): diff --git a/src/openeo_processes/cubes.py b/src/openeo_processes/cubes.py index 17e1436d..14cdddf7 100644 --- a/src/openeo_processes/cubes.py +++ b/src/openeo_processes/cubes.py @@ -19,19 +19,10 @@ def odc_load_helper(odc_cube, params: Dict[str, Any]) -> xr.DataArray: """Helper method to load a xarray DataArray from ODC.""" datacube = odc_cube.load(**params) - # Set no-data values to nan - new_data_vars = {} + # Improve CPU and MEM USAGE for name, data_var in datacube.data_vars.items(): - no_data = data_var.attrs["nodata"] - new_attrs = data_var.attrs - new_attrs["nodata"] = np.nan - new_data_vars[name] = xr.DataArray( - data=data_var.where(data_var != no_data), - coords=data_var.coords, - dims=data_var.dims, - attrs=new_attrs, - ) - datacube = xr.Dataset(data_vars=new_data_vars, coords=datacube.coords, attrs=datacube.attrs) + datacube[name] = datacube[name].where(datacube[name] != datacube[name].nodata) + datacube.attrs['nodata'] = np.nan # Convert to xr.DataArray # TODO: add conversion with multiple and custom dimensions @@ -313,19 +304,11 @@ def exec_xar(cube1, cube2, overlap_resolver = None, context={}): merge = merge.sortby(dimension) elif matching == len(cube1.coords): # all dimensions match if overlap_resolver is None: # no overlap resolver, so a new dimension is added - values = np.array([cube1.values, cube2.values]) - cubes = ["Cube01", "Cube02"] - coords = [cubes] - dimensions = ["cubes"] - for d in cube1.dims: - dimensions.append(d) - coords.append(cube1[d]) - merge = xr.DataArray(values, coords=coords, dims=dimensions, attrs=cube1.attrs) + merge = xr.concat([cube1, cube2], dim='cubes') + merge['cubes'] = ["Cube01", "Cube02"] else: if callable(overlap_resolver): # overlap resolver, for example add - values = overlap_resolver(cube1, cube2, **context) - merge = xr.DataArray(values, coords=cube1.coords, - dims=cube1.dims, attrs=cube1.attrs) # define dimensions like in cube1 + merge = overlap_resolver(cube1, cube2, **context) elif isinstance(overlap_resolver, xr.core.dataarray.DataArray): merge = overlap_resolver else: @@ -375,26 +358,21 @@ def exec_xar(cube1, cube2, overlap_resolver = None, context={}): c1 = cube2 c2 = cube1 check = [] - dims_l = c2.dims for c in c1.dims: check.append(c in c1.dims and c in c2.dims) for c in c2.dims: if not (c in c1.dims): dimension = c - c2 = c2.transpose(dimension, ...) - length = len(c2[dimension]) - if np.array(check).all(): - if callable(overlap_resolver): # overlap resolver, for example add - values = [] - for l in range(length): - values.append(overlap_resolver(c2[l], c1, **context).values) - merge = xr.DataArray(values, coords=c2.coords, - dims=c2.dims, attrs=c2.attrs) # define dimensions like in larger cube - merge = merge.transpose(*dims_l) + if np.array(check).all() and len(c2[dimension]) == 1 and callable(overlap_resolver): + c2 = c2.transpose(dimension, ...) + merge = overlap_resolver(c2[0], c1, **context) elif isinstance(overlap_resolver, xr.core.dataarray.DataArray): merge = overlap_resolver else: raise Exception('OverlapResolverMissing') + for a in cube1.attrs: + if a in cube2.attrs and (cube1.attrs[a] == cube2.attrs[a]): + merge.attrs[a] = cube1.attrs[a] return merge @@ -469,10 +447,12 @@ def exec_xar(data, parameters, function, dimension): apply_f = (lambda x, y, p: optimize.curve_fit(function, x[np.nonzero(y)], y[np.nonzero(y)], p)[0]) in_dims = [[dimension], [dimension], ['params']] add_arg = [step, data, parameters] + output_size = len(parameters['params']) else: apply_f = (lambda x, y: optimize.curve_fit(function, x[np.nonzero(y)], y[np.nonzero(y)], parameters)[0]) in_dims = [[dimension], [dimension]] add_arg = [step, data] + output_size = len(parameters) values = xr.apply_ufunc( apply_f, *add_arg, vectorize=True, @@ -480,9 +460,10 @@ def exec_xar(data, parameters, function, dimension): output_core_dims=[['params']], dask="parallelized", output_dtypes=[np.float32], - dask_gufunc_kwargs={'allow_rechunk': True, 'output_sizes': {'params': parameters}} + dask_gufunc_kwargs={'allow_rechunk': True, 'output_sizes': {'params': output_size}} ) values['params'] = list(range(len(values['params']))) + values.attrs = data.attrs return values ############################################################################### @@ -572,13 +553,14 @@ def exec_xar(data, parameters, function, dimension, labels = None): ) if test is None: values = values.transpose(*data.dims) - predicted = xr.DataArray(values, coords=data.coords, dims=data.dims, attrs=data.attrs, name=data.name) - predicted = predicted.where(data==0, data) + values[dimension] = data[dimension] + predicted = data.where(data != 0, values) else: predicted = values.transpose(*data.dims) predicted[dimension] = coords if dimension in ['t', 'times']: predicted = predicted.rename({dimension: 'time'}) + predicted.attrs = data.attrs return predicted @@ -625,39 +607,38 @@ def exec_xar(data, output_filepath='out', format='GTiff', options={}, write_prod data format (default: GTiff) """ - def extract_single_timestamp(data_without_time: xr.DataArray, timestamp: datetime = None) -> xr.Dataset: + def reformat_dataset(dataset: xr.DataArray, timestamp: datetime = None, has_time_dim: bool = False) \ + -> xr.Dataset: """Create a xarray Dataset.""" - coords = {dim: getattr(data_without_time, dim) for dim in data_without_time.dims if dim != 'bands'} - tmp = xr.Dataset(coords=coords) - if 'bands' in data_without_time.coords: + data_var = dataset.fillna(-9999) + data_var.attrs["nodata"] = -9999 + if 'bands' in dataset.coords: try: - for var in data_without_time['bands'].values: - data_var = data_without_time.loc[dict(bands=var)]\ - .where(data_without_time.loc[dict(bands=var)] != np.nan, -9999) - data_var.attrs["nodata"] = -9999 - tmp[str(var)] = (data_var.dims, data_var) + tmp = data_var.to_dataset(dim="bands") except Exception as e: print(e) - data_var = data_without_time.where(data_without_time != np.nan, -9999) - data_var.attrs["nodata"] = -9999 - tmp[str((data_without_time['bands'].values))] = (data_var.dims, data_var) + n = dataset['bands'].values + tmp = data_var.to_dataset(name=str(n)) else: - data_var = data_without_time.where(data_without_time != np.nan, -9999) - data_var.attrs["nodata"] = -9999 - tmp['result'] = (data_var.dims, data_var) + tmp = data_var.to_dataset(name='result') # fix dimension order current_dims = tuple(tmp.dims) - additional_dim = list(set(current_dims).difference({"bands", "y", "x"})) - if additional_dim and current_dims != (additional_dim[0], "y", "x"): - tmp = tmp.transpose(additional_dim[0], "y", "x") - elif current_dims != ("y", "x"): - tmp = tmp.transpose("y", "x") - - tmp.attrs = data_without_time.attrs + base_dims = ("bands", "time", "y", "x") if has_time_dim else ("bands", "y", "x") + additional_dim = list(set(current_dims).difference(set(base_dims))) + base_dims = base_dims[1:] # remove bands elem > not a dimension! + if additional_dim and current_dims != (additional_dim[0], *base_dims): + tmp = tmp.transpose(additional_dim[0], *base_dims) + elif current_dims != base_dims: + tmp = tmp.transpose(*base_dims) + + tmp.attrs = data_var.attrs # This is a hack! ODC always(!) expectes to have a time dimension # set datetime to now if no other information is available - tmp.attrs["datetime_from_dim"] = str(timestamp) if timestamp else str(datetime.now()) + if has_time_dim: + tmp = tmp.rename({"time": "t"}) + else: + tmp.attrs["datetime_from_dim"] = str(timestamp) if timestamp else str(datetime.now()) if "crs" not in tmp.attrs: first_data_var = tmp.data_vars[list(tmp.data_vars.keys())[0]] tmp.attrs["crs"] = first_data_var.geobox.crs.to_wkt() @@ -670,9 +651,9 @@ def refactor_data(data: xr.DataArray) -> List[xr.Dataset]: if 'time' in data.coords: for timestamp in data.time.values: data_at_timestamp = data.loc[dict(time=timestamp)] - all_tmp.append(extract_single_timestamp(data_at_timestamp, timestamp)) + all_tmp.append(reformat_dataset(data_at_timestamp, timestamp)) else: - all_tmp.append(extract_single_timestamp(data)) + all_tmp.append(reformat_dataset(data)) return all_tmp @@ -693,9 +674,16 @@ def create_output_filepath(output_filepath: str, idx: int = 0, ext: str = "nc") formats = ('GTiff', 'netCDF') if format.lower() == 'netcdf': data_list = refactor_data(data) - for idx, dataset in enumerate(data_list): - cur_output_filepath = create_output_filepath(output_filepath, idx, 'nc') - dataset.to_netcdf(path=cur_output_filepath) + paths = [] + for idx in range(len(data_list)): + paths.append(create_output_filepath(output_filepath, idx, 'nc')) + # combined dataset + if 'time' in data.coords: + combined_dataset = reformat_dataset(data, None, has_time_dim=True) + data_list.append(combined_dataset) + combined_path = f'{splitext(output_filepath)[0]}_combined.nc' + paths.append(combined_path) + xr.save_mfdataset(data_list, paths) elif format.lower() in ['gtiff','geotiff']: data_list = refactor_data(data) @@ -838,28 +826,18 @@ def exec_xar(data, target, dimension = None, valid_within = None): t = [] for i in index: t.append(target[dimension].values[int(i)]) - new_data = xr.DataArray(data.values, coords=data.coords, dims=data.dims, attrs=data.attrs, name=data.name) - new_data[dimension] = t filter_values = data[dimension].values + new_data = data #ATTENTION new_data is a shallow copy of data! When you change new_data also data is changed. + new_data[dimension] = t else: - index = np.array([]) + index = [] for d in target[dimension].values: difference = (np.abs(d - data[dimension].values)) nearest = np.argwhere(difference == np.min(difference)) - index = np.append(index, nearest) - v = [] - c = [] + index.append(int(nearest)) data_t = data.transpose(dimension, ...) - for i in index: - v.append(data_t.values[int(i)]) - c.append(data_t[dimension].values[int(i)]) - new_data = xr.DataArray(v, dims=data_t.dims, attrs=data.attrs, name=data.name) + new_data = data_t[index] new_data = new_data.transpose(*data.dims) - for d in new_data.dims: - if d == dimension: - new_data[d] = c - else: - new_data[d] = data[d].values filter_values = new_data[dimension].values new_data[dimension] = target[dimension].values if valid_within is None: @@ -870,6 +848,7 @@ def exec_xar(data, target, dimension = None, valid_within = None): new_data_t = new_data.transpose(dimension, ...) new_data_t = new_data_t[filter] new_data = new_data_t.transpose(*new_data.dims) + new_data.attrs = data.attrs return new_data @@ -1067,3 +1046,131 @@ def exec_xar(data, name): else: raise Exception('DimensionNotAvailable') +############################################################################### +# RenameDimension process +############################################################################### + + +@process +def rename_dimension(): + """ + Rename a dimension. + + Returns + ------- + xr.DataArray : + A data cube with the same dimensions, but the name of one of the dimensions changes. + The old name can not be referred to any longer. + The dimension properties (name, type, labels, reference system and resolution) remain unchanged. + """ + return RenameDimension() + + +class RenameDimension: + """ + Renames a dimension in the data cube while preserving all other properties. + """ + + @staticmethod + def exec_xar(data, source, target): + """ + Parameters + ---------- + data : xr.DataArray + A data cube. + source : str + The current name of the dimension. + Fails with a DimensionNotAvailable exception if the specified dimension does not exist. + target : str + A new Name for the dimension. + Fails with a DimensionExists exception if a dimension with the specified name exists. + + Returns + ------- + xr.DataArray : + A data cube with the same dimensions, but the name of one of the dimensions changes. + The old name can not be referred to any longer. + The dimension properties (name, type, labels, reference system and resolution) remain unchanged. + """ + if source not in data.dims: + raise Exception('DimensionNotAvailable') + elif target in data.dims: + raise Exception('DimensionExists') + return data.rename({source: target}) + + +############################################################################### +# RenameLabels process +############################################################################### + + +@process +def rename_labels(): + """ + Rename dimension labels. + + Returns + ------- + xr.DataArray : + The data cube with the same dimensions. + The dimension properties (name, type, labels, reference system and resolution) remain unchanged, except that for the given dimension the labels change. + The old labels can not be referred to any longer. The number of labels remains the same. + """ + return RenameLabels() + + +class RenameLabels: + """ + Renames a dimension in the data cube while preserving all other properties. + """ + + @staticmethod + def exec_xar(data, dimension, target, source = []): + """ + Parameters + ---------- + data : xr.DataArray + A data cube. + dimension : str + The name of the dimension to rename the labels for. + target : array + The new names for the labels. The dimension labels in the data cube are expected to be enumerated if the parameter target is not specified. + If a target dimension label already exists in the data cube, a LabelExists exception is thrown. + source : array + The names of the labels as they are currently in the data cube. + The array defines an unsorted and potentially incomplete list of labels that should be renamed to the names available in the corresponding array elements in the parameter target. + If one of the source dimension labels doesn't exist, the LabelNotAvailable exception is thrown. + By default, the array is empty so that the dimension labels in the data cube are expected to be enumerated. + + Returns + ------- + xr.DataArray : + The data cube with the same dimensions. + The dimension properties (name, type, labels, reference system and resolution) remain unchanged, except that for the given dimension the labels change. + The old labels can not be referred to any longer. The number of labels remains the same. + """ + if source == []: + source = data[dimension].values + if type(source) in [str, int, float]: + source = [source] + if type(target) in [str, int, float]: + target = [target] + if len(source) != len(target): + raise Exception('LabelMismatch') + source = np.array(source) + for s in source: + if s not in data[dimension].values: + raise Exception('LabelNotAvailable') + target = np.array(target) + for t in target: + if t in data[dimension].values: + raise Exception('LabelExists') + names = np.array([]) + for n in data[dimension].values: + if n in source: + index = np.argwhere(source == n) + names = np.append(names, target[int(index)]) + else: + names = np.append(names, n) + data[dimension] = names + return data diff --git a/src/openeo_processes/logic.py b/src/openeo_processes/logic.py index f7352792..c4d81b0b 100644 --- a/src/openeo_processes/logic.py +++ b/src/openeo_processes/logic.py @@ -478,8 +478,8 @@ def exec_xar(value, accept, reject=np.nan): xr.DataArray : Either the `accept` or `reject` argument depending on the given boolean value. """ - p = value.where(value == 1, reject) - p = p.where(value == 0, accept) + p = value.where(value == 0, accept) + p = p.where(value == 1, reject) return p @staticmethod diff --git a/src/openeo_processes/math.py b/src/openeo_processes/math.py index 545900e7..b26790ee 100644 --- a/src/openeo_processes/math.py +++ b/src/openeo_processes/math.py @@ -10,7 +10,7 @@ except ImportError: xar_addons = None -from openeo_processes.utils import process +from openeo_processes.utils import process, keep_attrs from openeo_processes.comparison import is_empty from openeo_processes.errors import QuantilesParameterConflict @@ -604,7 +604,10 @@ def exec_xar(x, base): xr.DataArray : The computed logarithm. """ - return xr.ufuncs.log(x)/xr.ufuncs.log(base) + l = xr.ufuncs.log(x)/xr.ufuncs.log(base) + if isinstance(x, xr.DataArray): + l.attrs = x.attrs + return l @staticmethod def exec_da(): @@ -1852,7 +1855,8 @@ def exec_xar(y, x): The computed angles in radians. """ - return xr.ufuncs.arctan2(y, x) + arct = xr.ufuncs.arctan2(y, x) + return keep_attrs(x, y, arct) @staticmethod def exec_da(): @@ -1979,7 +1983,9 @@ def exec_xar(x, inputMin, inputMax, outputMin=0., outputMax=1.): xr.DataArray : The transformed numbers. """ - return ((x - inputMin) / (inputMax - inputMin)) * (outputMax - outputMin) + outputMin + lsr = ((x - inputMin) / (inputMax - inputMin)) * (outputMax - outputMin) + outputMin + lsr.attrs = x.attrs + return lsr @staticmethod def exec_da(): @@ -2072,7 +2078,9 @@ def exec_xar(x, factor=1.): The scaled numbers. """ - return x*factor + s = x*factor + s.attrs = x.attrs + return s @staticmethod def exec_da(): @@ -2164,7 +2172,10 @@ def exec_xar(x, y): xr.DataArray : The remainders after division. """ - return x % y if x is not None and y is not None else None # xr.ufuncs.fmod(x, y) + if x is None or y is None: + return None + m = x % y + return keep_attrs(x, y, m) @staticmethod def exec_da(): @@ -2536,8 +2547,10 @@ def exec_xar(base, p): xr.DataArray : The computed values for `base` raised to the power of `p`. """ - - return base**float(p) + pow = base**float(p) + if isinstance(base, xr.DataArray): + pow.attrs = base.attrs + return pow @staticmethod @@ -2628,8 +2641,10 @@ def exec_xar(data, ignore_nodata=True, dimension=None): """ if is_empty(data): return np.nan - - return data.mean(dim=dimension, skipna=~ignore_nodata) + m = data.mean(dim=dimension, skipna=~ignore_nodata) + if isinstance(data, xr.DataArray): + m.attrs = data.attrs + return m @staticmethod def exec_da(): @@ -2724,8 +2739,10 @@ def exec_xar(data, ignore_nodata=True, dimension=None): if not dimension: dimension = data.dims[0] - - return data.min(dim=dimension, skipna=~ignore_nodata) + m = data.min(dim=dimension, skipna=~ignore_nodata) + if isinstance(data, xr.DataArray): + m.attrs = data.attrs + return m @staticmethod def exec_da(): @@ -2814,8 +2831,10 @@ def exec_xar(data, ignore_nodata=True, dimension=None): if not dimension: dimension = data.dims[0] - - return data.max(dim=dimension, skipna=~ignore_nodata) + m = data.max(dim=dimension, skipna=~ignore_nodata) + if isinstance(data, xr.DataArray): + m.attrs = data.attrs + return m @staticmethod def exec_da(): @@ -2916,8 +2935,10 @@ def exec_xar(data, ignore_nodata=True, dimension=None): """ if is_empty(data): return np.nan - - return data.median(dim=dimension, skipna=~ignore_nodata) + m = data.median(dim=dimension, skipna=~ignore_nodata) + if isinstance(data, xr.DataArray): + m.attrs = data.attrs + return m @staticmethod def exec_da(): @@ -3021,8 +3042,10 @@ def exec_xar(data, ignore_nodata=True, dimension=None): """ if is_empty(data): return np.nan - - return data.std(dim=dimension, skipna=~ignore_nodata) + s = data.std(dim=dimension, skipna=~ignore_nodata) + if isinstance(data, xr.DataArray): + s.attrs = data.attrs + return s @staticmethod def exec_da(): @@ -3115,8 +3138,10 @@ def exec_xar(data, ignore_nodata=True, dimension=None): if is_empty(data): return np.nan - - return data.var(dim=dimension, skipna=~ignore_nodata) + v = data.var(dim=dimension, skipna=~ignore_nodata) + if isinstance(data, xr.DataArray): + v.attrs = data.attrs + return v @staticmethod def exec_da(): @@ -3206,11 +3231,12 @@ def exec_xar(data, ignore_nodata=True, dimension=None): if is_empty(data): return xr.DataArray(np.nan) else: - minimum = data.min(dim=dimension, skipna=~ignore_nodata).values - maximum = data.max(dim=dimension, skipna=~ignore_nodata).values - extrema = np.append(minimum, maximum) - - return xr.DataArray(extrema) + minimum = data.min(dim=dimension, skipna=~ignore_nodata) + maximum = data.max(dim=dimension, skipna=~ignore_nodata) + extrema = xr.concat([minimum, maximum], dim='extrema') + extrema['extrema'] = ['min', 'max'] + extrema.attrs = data.attrs + return extrema @staticmethod @@ -3469,8 +3495,10 @@ def exec_xar(data, probabilities=None, q=None, ignore_nodata=True, dimension=0): if is_empty(data): return [np.nan] * len(probabilities) - - return data.quantile(np.array(probabilities), dim=dimension, skipna=~ignore_nodata) + q = data.quantile(np.array(probabilities), dim=dimension, skipna=~ignore_nodata) + if isinstance(data, xr.DataArray): + q.attrs = data.attrs + return q @staticmethod def exec_da(): @@ -4030,6 +4058,12 @@ def exec_xar(data, ignore_nodata=True, dimension=None): summand += item # Concatenate along dim 'new_dim' data = xr.concat(data_tmp, dim='new_dim') + elif isinstance(data, xr.DataArray): + if not dimension: + dimension = data.dims[0] + s = data.sum(dim=dimension, skipna=~ignore_nodata) + s.attrs = data.attrs + return s if is_empty(data): return np.nan @@ -4165,8 +4199,10 @@ def exec_xar(data, ignore_nodata=True, dimension=None, extra_values=None): if not dimension: dimension = data.dims[0] - - return data.prod(dim=dimension, skipna=ignore_nodata) * multiplicand + p = data.prod(dim=dimension, skipna=ignore_nodata) * multiplicand + if isinstance(data, xr.DataArray): + p.attrs = data.attrs + return p @staticmethod def exec_da(): @@ -4260,7 +4296,8 @@ def exec_xar(x, y): The computed sum. """ - return x + y + added = x + y + return keep_attrs(x, y, added) @staticmethod def exec_da(): @@ -4357,7 +4394,8 @@ def exec_xar(x, y): The computed result. """ - return x - y + sub = x - y + return keep_attrs(x, y, sub) @staticmethod def exec_da(): @@ -4453,7 +4491,8 @@ def exec_xar(x, y): The computed product. """ - return x * y + mult = x * y + return keep_attrs(x, y, mult) @staticmethod def exec_da(): @@ -4549,7 +4588,8 @@ def exec_xar(x, y): The computed result. """ - return x / y + div = x / y + return keep_attrs(x, y, div) @staticmethod def exec_da(): @@ -4656,7 +4696,8 @@ def exec_xar(x, y): xr.DataArray : The computed normalized difference. """ - return (x - y) / (x + y) + nd = (x - y) / (x + y) + return keep_attrs(x, y, nd) @staticmethod def exec_da(): diff --git a/src/openeo_processes/utils.py b/src/openeo_processes/utils.py index 26c0f256..a88ef40d 100644 --- a/src/openeo_processes/utils.py +++ b/src/openeo_processes/utils.py @@ -290,6 +290,23 @@ def get_time_dimension_from_data(data: xr.DataArray, dim: str = "time") -> str: if time_dim in data.dims: return time_dim +def keep_attrs(x, y, data): + """Keeps the attributes of the inputs x and y in the output data. + + When a processes, which requires two inputs x and y is used, + the attributes of x and y are not forwarded to the output data. + This checks if one of the inputs is a Dataarray, which has attributes. + The attributes of x or y are then given to the output data. + """ + if isinstance(x, xr.DataArray) and isinstance(y, xr.DataArray): + for a in x.attrs: + if a in y.attrs and (x.attrs[a] == y.attrs[a]): + data.attrs[a] = x.attrs[a] + elif isinstance(x, xr.DataArray): + data.attrs = x.attrs + elif isinstance(y, xr.DataArray): + data.attrs = y.attrs + return data if __name__ == '__main__': pass diff --git a/tests/test_cubes.py b/tests/test_cubes.py index 2631442a..3b3e4467 100644 --- a/tests/test_cubes.py +++ b/tests/test_cubes.py @@ -76,6 +76,7 @@ def test_save_result(self): out_filename = "out.nc" out_filename_0 = "out_00000.nc" out_filename_1 = "out_00001.nc" + out_filename_combined = "out_combined.nc" oeop.save_result(self.test_data.xr_odc_data_3d, out_filename, format='netCDF') assert os.path.exists(out_filename_0) @@ -92,9 +93,11 @@ def test_save_result(self): oeop.save_result(self.test_data.xr_odc_data_4d, format='netCDF') assert os.path.exists(out_filename_0) assert os.path.exists(out_filename_1) + assert os.path.exists(out_filename_combined) assert os.path.exists(out_product) os.remove(out_filename_0) os.remove(out_filename_1) + os.remove(out_filename_combined) os.remove(out_product) def test_save_result_from_file(self): @@ -114,6 +117,7 @@ def test_save_result_from_file(self): assert actual_ds_0.result.dims == ("y", "x") for i in range(10): os.remove(f"out_{str(i).zfill(5)}.nc") + os.remove("out_combined.nc") os.remove("product.yml") def test_fit_curve(self): @@ -223,5 +227,15 @@ def test_drop_dimension(self): data = oeop.add_dimension(self.test_data.xr_data_factor(), 'cubes', 'Cube01') xr.testing.assert_equal(oeop.drop_dimension(data, 'cubes'), self.test_data.xr_data_factor()) + def test_rename_dimension(self): + """Tests 'rename_dimension' function. """ + data = oeop.rename_dimension(self.test_data.xr_data_factor(), 'x', 'longitude') + assert (data.dims == ('time', 'y', 'longitude')) + + def test_rename_labels(self): + """Tests 'rename_labels' function. """ + data = oeop.rename_labels(self.test_data.xr_data_factor(), 'x', [119, 120, 121], [118.9, 119.9, 120.9]) + assert (data['x'].values == (119, 120, 121)).all() + if __name__ == "__main__": unittest.main() diff --git a/tests/test_math.py b/tests/test_math.py index 374a6af1..ee9ee528 100644 --- a/tests/test_math.py +++ b/tests/test_math.py @@ -302,9 +302,7 @@ def test_extrema(self): assert np.isclose(oeop.extrema([1, 0, 3, np.nan, 2], ignore_nodata=False), [np.nan, np.nan], equal_nan=True).all() assert np.isclose(oeop.extrema([]), [np.nan, np.nan], equal_nan=True).all() - xr.testing.assert_equal( - oeop.extrema(self.test_data.xr_data_factor(3, 5)), - xr.DataArray(np.append(3,5))) + assert (oeop.extrema(self.test_data.xr_data_factor(3, 5)).values == np.array([3, 5])).all() def test_clip(self): """ Tests `clip` function. """ @@ -444,6 +442,7 @@ def test_add(self): xr.testing.assert_equal( oeop.add(self.test_data.xr_data_factor(1, 9), self.test_data.xr_data_factor(np.nan, -2)), self.test_data.xr_data_factor(np.nan, 7)) + assert self.test_data.xr_data_factor(1, 9).attrs == oeop.add(7, self.test_data.xr_data_factor(1, 9)).attrs def test_subtract(self): """ Tests `subtract` function. """