From e3e87f370fa01e403eb6a60aa227f3cac2fc3b85 Mon Sep 17 00:00:00 2001 From: ValentinaHutter <85164505+ValentinaHutter@users.noreply.github.com> Date: Wed, 8 Sep 2021 09:01:26 +0200 Subject: [PATCH 01/19] Changed odc_load_helper to improve CPU and MEM USAGE --- src/openeo_processes/cubes.py | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/src/openeo_processes/cubes.py b/src/openeo_processes/cubes.py index 17e1436d..d129399b 100644 --- a/src/openeo_processes/cubes.py +++ b/src/openeo_processes/cubes.py @@ -19,19 +19,9 @@ def odc_load_helper(odc_cube, params: Dict[str, Any]) -> xr.DataArray: """Helper method to load a xarray DataArray from ODC.""" datacube = odc_cube.load(**params) - # Set no-data values to nan - new_data_vars = {} + # Improve CPU and MEM USAGE for name, data_var in datacube.data_vars.items(): - no_data = data_var.attrs["nodata"] - new_attrs = data_var.attrs - new_attrs["nodata"] = np.nan - new_data_vars[name] = xr.DataArray( - data=data_var.where(data_var != no_data), - coords=data_var.coords, - dims=data_var.dims, - attrs=new_attrs, - ) - datacube = xr.Dataset(data_vars=new_data_vars, coords=datacube.coords, attrs=datacube.attrs) + datacube[name] = datacube[name].where(datacube[name] != datacube[name].nodata) # Convert to xr.DataArray # TODO: add conversion with multiple and custom dimensions @@ -480,7 +470,7 @@ def exec_xar(data, parameters, function, dimension): output_core_dims=[['params']], dask="parallelized", output_dtypes=[np.float32], - dask_gufunc_kwargs={'allow_rechunk': True, 'output_sizes': {'params': parameters}} + dask_gufunc_kwargs={'allow_rechunk': True, 'output_sizes': {'params': len(parameters)}} ) values['params'] = list(range(len(values['params']))) return values From d87ca7507239376772a8f4a1b7185f39e2ec2444 Mon Sep 17 00:00:00 2001 From: ValentinaHutter <85164505+ValentinaHutter@users.noreply.github.com> Date: Wed, 8 Sep 2021 09:25:58 +0200 Subject: [PATCH 02/19] Change in fit_curve output_size --- src/openeo_processes/cubes.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/openeo_processes/cubes.py b/src/openeo_processes/cubes.py index d129399b..7c2eccd1 100644 --- a/src/openeo_processes/cubes.py +++ b/src/openeo_processes/cubes.py @@ -459,10 +459,12 @@ def exec_xar(data, parameters, function, dimension): apply_f = (lambda x, y, p: optimize.curve_fit(function, x[np.nonzero(y)], y[np.nonzero(y)], p)[0]) in_dims = [[dimension], [dimension], ['params']] add_arg = [step, data, parameters] + output_size = len(parameters['params']) else: apply_f = (lambda x, y: optimize.curve_fit(function, x[np.nonzero(y)], y[np.nonzero(y)], parameters)[0]) in_dims = [[dimension], [dimension]] add_arg = [step, data] + output_size = len(parameters) values = xr.apply_ufunc( apply_f, *add_arg, vectorize=True, @@ -470,7 +472,7 @@ def exec_xar(data, parameters, function, dimension): output_core_dims=[['params']], dask="parallelized", output_dtypes=[np.float32], - dask_gufunc_kwargs={'allow_rechunk': True, 'output_sizes': {'params': len(parameters)}} + dask_gufunc_kwargs={'allow_rechunk': True, 'output_sizes': {'params': output_size}} ) values['params'] = list(range(len(values['params']))) return values From f9ed43e7ed8171d35fdb09ed945f9b27a49a47f8 Mon Sep 17 00:00:00 2001 From: ValentinaHutter <85164505+ValentinaHutter@users.noreply.github.com> Date: Wed, 15 Sep 2021 10:19:17 +0200 Subject: [PATCH 03/19] Updated save_result, updated processes that load values, no values loaded anymore --- src/openeo_processes/arrays.py | 4 -- src/openeo_processes/cubes.py | 72 ++++++++++------------------------ src/openeo_processes/logic.py | 4 +- src/openeo_processes/math.py | 9 +++-- src/openeo_processes/utils.py | 2 +- tests/test_math.py | 4 +- 6 files changed, 29 insertions(+), 66 deletions(-) diff --git a/src/openeo_processes/arrays.py b/src/openeo_processes/arrays.py index fab7dffa..4c76b52a 100644 --- a/src/openeo_processes/arrays.py +++ b/src/openeo_processes/arrays.py @@ -923,8 +923,6 @@ def exec_xar(data, ignore_nodata=True, dimension=None): while (data_first.fillna(999) != data_first).any() and i < len(data_dim): data_first = data_first.fillna(data_dim[i]) i += 1 - else: - pass return data_first @staticmethod @@ -1030,8 +1028,6 @@ def exec_xar(data, ignore_nodata=True, dimension=None): while (data_last.fillna(999) != data_last).any() and i >= 0: data_last = data_last.fillna(data_dim[i]) i -= 1 - else: - pass return data_last @staticmethod diff --git a/src/openeo_processes/cubes.py b/src/openeo_processes/cubes.py index 7c2eccd1..287848e5 100644 --- a/src/openeo_processes/cubes.py +++ b/src/openeo_processes/cubes.py @@ -22,6 +22,7 @@ def odc_load_helper(odc_cube, params: Dict[str, Any]) -> xr.DataArray: # Improve CPU and MEM USAGE for name, data_var in datacube.data_vars.items(): datacube[name] = datacube[name].where(datacube[name] != datacube[name].nodata) + datacube.attrs['nodata'] = np.nan # Convert to xr.DataArray # TODO: add conversion with multiple and custom dimensions @@ -303,19 +304,11 @@ def exec_xar(cube1, cube2, overlap_resolver = None, context={}): merge = merge.sortby(dimension) elif matching == len(cube1.coords): # all dimensions match if overlap_resolver is None: # no overlap resolver, so a new dimension is added - values = np.array([cube1.values, cube2.values]) - cubes = ["Cube01", "Cube02"] - coords = [cubes] - dimensions = ["cubes"] - for d in cube1.dims: - dimensions.append(d) - coords.append(cube1[d]) - merge = xr.DataArray(values, coords=coords, dims=dimensions, attrs=cube1.attrs) + merge = xr.concat([cube1, cube2], dim='cubes') + merge['cubes'] = ["Cube01", "Cube02"] else: if callable(overlap_resolver): # overlap resolver, for example add - values = overlap_resolver(cube1, cube2, **context) - merge = xr.DataArray(values, coords=cube1.coords, - dims=cube1.dims, attrs=cube1.attrs) # define dimensions like in cube1 + merge = overlap_resolver(cube1, cube2, **context) elif isinstance(overlap_resolver, xr.core.dataarray.DataArray): merge = overlap_resolver else: @@ -365,22 +358,14 @@ def exec_xar(cube1, cube2, overlap_resolver = None, context={}): c1 = cube2 c2 = cube1 check = [] - dims_l = c2.dims for c in c1.dims: check.append(c in c1.dims and c in c2.dims) for c in c2.dims: if not (c in c1.dims): dimension = c - c2 = c2.transpose(dimension, ...) - length = len(c2[dimension]) - if np.array(check).all(): - if callable(overlap_resolver): # overlap resolver, for example add - values = [] - for l in range(length): - values.append(overlap_resolver(c2[l], c1, **context).values) - merge = xr.DataArray(values, coords=c2.coords, - dims=c2.dims, attrs=c2.attrs) # define dimensions like in larger cube - merge = merge.transpose(*dims_l) + if np.array(check).all() and len(c2[dimension]) == 1 and callable(overlap_resolver): + c2 = c2.transpose(dimension, ...) + merge = overlap_resolver(c2[0], c1, **context) elif isinstance(overlap_resolver, xr.core.dataarray.DataArray): merge = overlap_resolver else: @@ -564,8 +549,8 @@ def exec_xar(data, parameters, function, dimension, labels = None): ) if test is None: values = values.transpose(*data.dims) - predicted = xr.DataArray(values, coords=data.coords, dims=data.dims, attrs=data.attrs, name=data.name) - predicted = predicted.where(data==0, data) + values[dimension] = data[dimension] + predicted = data.where(data != 0, values) else: predicted = values.transpose(*data.dims) predicted[dimension] = coords @@ -619,24 +604,17 @@ def exec_xar(data, output_filepath='out', format='GTiff', options={}, write_prod """ def extract_single_timestamp(data_without_time: xr.DataArray, timestamp: datetime = None) -> xr.Dataset: """Create a xarray Dataset.""" - coords = {dim: getattr(data_without_time, dim) for dim in data_without_time.dims if dim != 'bands'} - tmp = xr.Dataset(coords=coords) + data_var = data_without_time.fillna(-9999) + data_var.attrs["nodata"] = -9999 if 'bands' in data_without_time.coords: try: - for var in data_without_time['bands'].values: - data_var = data_without_time.loc[dict(bands=var)]\ - .where(data_without_time.loc[dict(bands=var)] != np.nan, -9999) - data_var.attrs["nodata"] = -9999 - tmp[str(var)] = (data_var.dims, data_var) + tmp = data_var.to_dataset(dim="bands") except Exception as e: print(e) - data_var = data_without_time.where(data_without_time != np.nan, -9999) - data_var.attrs["nodata"] = -9999 - tmp[str((data_without_time['bands'].values))] = (data_var.dims, data_var) + n = data_without_time['bands'].values + tmp = data_var.to_dataset(name=str(n)) else: - data_var = data_without_time.where(data_without_time != np.nan, -9999) - data_var.attrs["nodata"] = -9999 - tmp['result'] = (data_var.dims, data_var) + tmp = data_var.to_dataset(name='result') # fix dimension order current_dims = tuple(tmp.dims) @@ -830,28 +808,18 @@ def exec_xar(data, target, dimension = None, valid_within = None): t = [] for i in index: t.append(target[dimension].values[int(i)]) - new_data = xr.DataArray(data.values, coords=data.coords, dims=data.dims, attrs=data.attrs, name=data.name) - new_data[dimension] = t filter_values = data[dimension].values + new_data = data + new_data[dimension] = t else: - index = np.array([]) + index = [] for d in target[dimension].values: difference = (np.abs(d - data[dimension].values)) nearest = np.argwhere(difference == np.min(difference)) - index = np.append(index, nearest) - v = [] - c = [] + index.append(int(nearest)) data_t = data.transpose(dimension, ...) - for i in index: - v.append(data_t.values[int(i)]) - c.append(data_t[dimension].values[int(i)]) - new_data = xr.DataArray(v, dims=data_t.dims, attrs=data.attrs, name=data.name) + new_data = data_t[index] new_data = new_data.transpose(*data.dims) - for d in new_data.dims: - if d == dimension: - new_data[d] = c - else: - new_data[d] = data[d].values filter_values = new_data[dimension].values new_data[dimension] = target[dimension].values if valid_within is None: diff --git a/src/openeo_processes/logic.py b/src/openeo_processes/logic.py index f7352792..c4d81b0b 100644 --- a/src/openeo_processes/logic.py +++ b/src/openeo_processes/logic.py @@ -478,8 +478,8 @@ def exec_xar(value, accept, reject=np.nan): xr.DataArray : Either the `accept` or `reject` argument depending on the given boolean value. """ - p = value.where(value == 1, reject) - p = p.where(value == 0, accept) + p = value.where(value == 0, accept) + p = p.where(value == 1, reject) return p @staticmethod diff --git a/src/openeo_processes/math.py b/src/openeo_processes/math.py index 545900e7..eac9981f 100644 --- a/src/openeo_processes/math.py +++ b/src/openeo_processes/math.py @@ -3206,11 +3206,12 @@ def exec_xar(data, ignore_nodata=True, dimension=None): if is_empty(data): return xr.DataArray(np.nan) else: - minimum = data.min(dim=dimension, skipna=~ignore_nodata).values - maximum = data.max(dim=dimension, skipna=~ignore_nodata).values - extrema = np.append(minimum, maximum) + minimum = data.min(dim=dimension, skipna=~ignore_nodata) + maximum = data.max(dim=dimension, skipna=~ignore_nodata) + extrema = xr.concat([minimum, maximum], dim='extrema') + extrema['extrema'] = ['min', 'max'] - return xr.DataArray(extrema) + return extrema @staticmethod diff --git a/src/openeo_processes/utils.py b/src/openeo_processes/utils.py index 26c0f256..f4327b44 100644 --- a/src/openeo_processes/utils.py +++ b/src/openeo_processes/utils.py @@ -104,7 +104,7 @@ def fun_wrapper(*args, **kwargs): elif "numpy" in datatypes: cls_fun = getattr(cls, "exec_np") elif "dask" in datatypes: - cls_fun = getattr(cls, "exec_dar") + cls_fun = getattr(cls, "exec_xar") elif datatypes.issubset({"int", "float", "NoneType", "str", "bool", "datetime"}): cls_fun = getattr(cls, "exec_num") elif "tuple" in datatypes: diff --git a/tests/test_math.py b/tests/test_math.py index 374a6af1..d76bc1fc 100644 --- a/tests/test_math.py +++ b/tests/test_math.py @@ -302,9 +302,7 @@ def test_extrema(self): assert np.isclose(oeop.extrema([1, 0, 3, np.nan, 2], ignore_nodata=False), [np.nan, np.nan], equal_nan=True).all() assert np.isclose(oeop.extrema([]), [np.nan, np.nan], equal_nan=True).all() - xr.testing.assert_equal( - oeop.extrema(self.test_data.xr_data_factor(3, 5)), - xr.DataArray(np.append(3,5))) + assert (oeop.extrema(self.test_data.xr_data_factor(3, 5)).values == np.array([3, 5])).all() def test_clip(self): """ Tests `clip` function. """ From 1f8f9c1a1cc480c44f67dc975bb63d5d85dc86d4 Mon Sep 17 00:00:00 2001 From: ValentinaHutter <85164505+ValentinaHutter@users.noreply.github.com> Date: Fri, 24 Sep 2021 09:39:23 +0200 Subject: [PATCH 04/19] Added attributes --- src/openeo_processes/comparison.py | 29 ++++++++++++---- src/openeo_processes/cubes.py | 6 ++++ src/openeo_processes/math.py | 56 +++++++++++++++++++++++++----- tests/test_math.py | 1 + 4 files changed, 77 insertions(+), 15 deletions(-) diff --git a/src/openeo_processes/comparison.py b/src/openeo_processes/comparison.py index 7744bf83..3af701f0 100644 --- a/src/openeo_processes/comparison.py +++ b/src/openeo_processes/comparison.py @@ -430,7 +430,14 @@ def exec_xar(x, y, delta=False, case_sensitive=True, reduce=False): # TODO: add ar_eq = x_time == y_time # comparison of dates else: ar_eq = x == y - + if isinstance(x, xr.DataArray) and isinstance(y, xr.DataArray): + for a in x.attrs: + if a in y.attrs: + ar_eq.attrs[a] = x.attrs[a] + elif isinstance(x, xr.DataArray): + ar_eq.attrs = x.attrs + elif isinstance(y, xr.DataArray): + ar_eq.attrs = y.attrs if reduce: return ar_eq.all() else: @@ -704,14 +711,22 @@ def exec_xar(x, y, reduce=False): ## x has to be a datacube, whereas y can be another datacube, an integer or a float if x is None or y is None: return None - elif isinstance(y, xr.DataArray) or isinstance(y, int) or isinstance(y, float): + if isinstance(x, xr.DataArray) and isinstance(y, xr.DataArray): gt_ar = x > y - if reduce: - return gt_ar.all() - else: - return gt_ar + for a in x.attrs: + if a in y.attrs: + gt_ar.attrs[a] = x.attrs[a] + elif isinstance(x, xr.DataArray) and isinstance(y, int) or isinstance(y, float): + gt_ar = x > y + gt_ar.attrs = x.attrs + elif isinstance(y, xr.DataArray) and isinstance(x, int) or isinstance(x, float): + gt_ar = x > y + gt_ar.attrs = y.attrs + if reduce: + return gt_ar.all() else: - return False + return gt_ar + return False @staticmethod def exec_da(): diff --git a/src/openeo_processes/cubes.py b/src/openeo_processes/cubes.py index 287848e5..56d833a5 100644 --- a/src/openeo_processes/cubes.py +++ b/src/openeo_processes/cubes.py @@ -370,6 +370,9 @@ def exec_xar(cube1, cube2, overlap_resolver = None, context={}): merge = overlap_resolver else: raise Exception('OverlapResolverMissing') + for a in cube1.attrs: + if a in cube2.attrs: + merge.attrs[a] = cube1.attrs[a] return merge @@ -460,6 +463,7 @@ def exec_xar(data, parameters, function, dimension): dask_gufunc_kwargs={'allow_rechunk': True, 'output_sizes': {'params': output_size}} ) values['params'] = list(range(len(values['params']))) + values.attrs = data.attrs return values ############################################################################### @@ -556,6 +560,7 @@ def exec_xar(data, parameters, function, dimension, labels = None): predicted[dimension] = coords if dimension in ['t', 'times']: predicted = predicted.rename({dimension: 'time'}) + predicted.attrs = data.attrs return predicted @@ -830,6 +835,7 @@ def exec_xar(data, target, dimension = None, valid_within = None): new_data_t = new_data.transpose(dimension, ...) new_data_t = new_data_t[filter] new_data = new_data_t.transpose(*new_data.dims) + new_data.attrs = data.attrs return new_data diff --git a/src/openeo_processes/math.py b/src/openeo_processes/math.py index eac9981f..e97bde5c 100644 --- a/src/openeo_processes/math.py +++ b/src/openeo_processes/math.py @@ -2536,8 +2536,10 @@ def exec_xar(base, p): xr.DataArray : The computed values for `base` raised to the power of `p`. """ - - return base**float(p) + pow = base**float(p) + if isinstance(base, xr.DataArray): + pow.attrs = base.attrs + return pow @staticmethod @@ -3021,8 +3023,10 @@ def exec_xar(data, ignore_nodata=True, dimension=None): """ if is_empty(data): return np.nan - - return data.std(dim=dimension, skipna=~ignore_nodata) + s = data.std(dim=dimension, skipna=~ignore_nodata) + if isinstance(data, xr.DataArray): + s.attrs = data.attrs + return s @staticmethod def exec_da(): @@ -4261,7 +4265,16 @@ def exec_xar(x, y): The computed sum. """ - return x + y + added = x + y + if isinstance(x, xr.DataArray) and isinstance(y, xr.DataArray): + for a in x.attrs: + if a in y.attrs: + added.attrs[a] = x.attrs[a] + elif isinstance(x, xr.DataArray): + added.attrs = x.attrs + elif isinstance(y, xr.DataArray): + added.attrs = y.attrs + return added @staticmethod def exec_da(): @@ -4358,7 +4371,16 @@ def exec_xar(x, y): The computed result. """ - return x - y + sub = x - y + if isinstance(x, xr.DataArray) and isinstance(y, xr.DataArray): + for a in x.attrs: + if a in y.attrs: + sub.attrs[a] = x.attrs[a] + elif isinstance(x, xr.DataArray): + sub.attrs = x.attrs + elif isinstance(y, xr.DataArray): + sub.attrs = y.attrs + return sub @staticmethod def exec_da(): @@ -4454,7 +4476,16 @@ def exec_xar(x, y): The computed product. """ - return x * y + mult = x * y + if isinstance(x, xr.DataArray) and isinstance(y, xr.DataArray): + for a in x.attrs: + if a in y.attrs: + mult.attrs[a] = x.attrs[a] + elif isinstance(x, xr.DataArray): + mult.attrs = x.attrs + elif isinstance(y, xr.DataArray): + mult.attrs = y.attrs + return mult @staticmethod def exec_da(): @@ -4550,7 +4581,16 @@ def exec_xar(x, y): The computed result. """ - return x / y + div = x / y + if isinstance(x, xr.DataArray) and isinstance(y, xr.DataArray): + for a in x.attrs: + if a in y.attrs: + div.attrs[a] = x.attrs[a] + elif isinstance(x, xr.DataArray): + div.attrs = x.attrs + elif isinstance(y, xr.DataArray): + div.attrs = y.attrs + return div @staticmethod def exec_da(): diff --git a/tests/test_math.py b/tests/test_math.py index d76bc1fc..ee9ee528 100644 --- a/tests/test_math.py +++ b/tests/test_math.py @@ -442,6 +442,7 @@ def test_add(self): xr.testing.assert_equal( oeop.add(self.test_data.xr_data_factor(1, 9), self.test_data.xr_data_factor(np.nan, -2)), self.test_data.xr_data_factor(np.nan, 7)) + assert self.test_data.xr_data_factor(1, 9).attrs == oeop.add(7, self.test_data.xr_data_factor(1, 9)).attrs def test_subtract(self): """ Tests `subtract` function. """ From 618173bbe69fa66b16ec908671512c6cc87d138a Mon Sep 17 00:00:00 2001 From: ValentinaHutter <85164505+ValentinaHutter@users.noreply.github.com> Date: Fri, 24 Sep 2021 10:37:46 +0200 Subject: [PATCH 05/19] Update comparison processes to check datatype of input --- src/openeo_processes/comparison.py | 72 ++++++++++++++++++++---------- 1 file changed, 48 insertions(+), 24 deletions(-) diff --git a/src/openeo_processes/comparison.py b/src/openeo_processes/comparison.py index 3af701f0..4c91fa94 100644 --- a/src/openeo_processes/comparison.py +++ b/src/openeo_processes/comparison.py @@ -707,8 +707,7 @@ def exec_xar(x, y, reduce=False): xr.DataArray: : Returns True if `x` is strictly greater than `y`, None if any operand is None, otherwise False. """ - - ## x has to be a datacube, whereas y can be another datacube, an integer or a float + if x is None or y is None: return None if isinstance(x, xr.DataArray) and isinstance(y, xr.DataArray): @@ -858,14 +857,22 @@ def exec_xar(x, y, reduce = False): """ if x is None or y is None: return None - elif isinstance(y, xr.DataArray) or isinstance(y, int) or isinstance(y, float): + if isinstance(x, xr.DataArray) and isinstance(y, xr.DataArray): gte_ar = ((x-y) >= 0) - if reduce: - return gte_ar.all() - else: - return gte_ar + for a in x.attrs: + if a in y.attrs: + gte_ar.attrs[a] = x.attrs[a] + elif isinstance(x, xr.DataArray) and isinstance(y, int) or isinstance(y, float): + gte_ar = ((x-y) >= 0) + gte_ar.attrs = x.attrs + elif isinstance(y, xr.DataArray) and isinstance(x, int) or isinstance(x, float): + gte_ar = ((x-y) >= 0) + gte_ar.attrs = y.attrs + if reduce: + return gte_ar.all() else: - return False + return gte_ar + return False @staticmethod def exec_da(): @@ -995,17 +1002,24 @@ def exec_xar(x, y, reduce = False): Returns True if `x` is strictly lower than `y`, None if any operand is None, otherwise False. """ - ## x has to be a datacube, whereas y can be another datacube, an integer or a float if x is None or y is None: return None - elif isinstance(y, xr.DataArray) or isinstance(y, int) or isinstance(y, float): + if isinstance(x, xr.DataArray) and isinstance(y, xr.DataArray): lt_ar = x < y - if reduce: - return lt_ar.all() - else: - return lt_ar + for a in x.attrs: + if a in y.attrs: + lt_ar.attrs[a] = x.attrs[a] + elif isinstance(x, xr.DataArray) and isinstance(y, int) or isinstance(y, float): + lt_ar = x < y + lt_ar.attrs = x.attrs + elif isinstance(y, xr.DataArray) and isinstance(x, int) or isinstance(x, float): + lt_ar = x < y + lt_ar.attrs = y.attrs + if reduce: + return lt_ar.all() else: - return False + return lt_ar + return False @staticmethod def exec_da(): @@ -1135,17 +1149,24 @@ def exec_xar(x, y, reduce = False): Returns True if `x` is strictly lower than or equal to `y`, None if any operand is None, otherwise False. """ - ## x has to be a datacube, whereas y can be another datacube, an integer or a float if x is None or y is None: return None - elif isinstance(y, xr.DataArray) or isinstance(y, int) or isinstance(y, float): + if isinstance(x, xr.DataArray) and isinstance(y, xr.DataArray): lte_ar = x <= y - if reduce: - return lte_ar.all() - else: - return lte_ar + for a in x.attrs: + if a in y.attrs: + lte_ar.attrs[a] = x.attrs[a] + elif isinstance(x, xr.DataArray) and isinstance(y, int) or isinstance(y, float): + lte_ar = x <= y + lte_ar.attrs = x.attrs + elif isinstance(y, xr.DataArray) and isinstance(x, int) or isinstance(x, float): + lte_ar = x <= y + lte_ar.attrs = y.attrs + if reduce: + return lte_ar.all() else: - return False + return lte_ar + return False @staticmethod def exec_da(): @@ -1301,9 +1322,12 @@ def exec_xar(x, min, max, exclude_max=False, reduce=False): return False if exclude_max: - return xr.ufuncs.logical_and(Gte.exec_xar(x, min, reduce=reduce) , Lt.exec_xar(x, max, reduce=reduce)) + bet = xr.ufuncs.logical_and(Gte.exec_xar(x, min, reduce=reduce) , Lt.exec_xar(x, max, reduce=reduce)) else: - return xr.ufuncs.logical_and(Gte.exec_xar(x, min, reduce=reduce) , Lte.exec_xar(x, max, reduce=reduce)) + bet = xr.ufuncs.logical_and(Gte.exec_xar(x, min, reduce=reduce) , Lte.exec_xar(x, max, reduce=reduce)) + if isinstance(x, xr.DataArray): + bet.attrs = x.attrs + return bet @staticmethod def exec_da(): From c84442e1054244311a4084039f8bb9132d42cc2f Mon Sep 17 00:00:00 2001 From: ValentinaHutter <85164505+ValentinaHutter@users.noreply.github.com> Date: Fri, 24 Sep 2021 16:16:49 +0200 Subject: [PATCH 06/19] Changed attributes in math processes --- src/openeo_processes/math.py | 96 ++++++++++++++++++++++++++++-------- 1 file changed, 75 insertions(+), 21 deletions(-) diff --git a/src/openeo_processes/math.py b/src/openeo_processes/math.py index e97bde5c..df7e892c 100644 --- a/src/openeo_processes/math.py +++ b/src/openeo_processes/math.py @@ -604,7 +604,10 @@ def exec_xar(x, base): xr.DataArray : The computed logarithm. """ - return xr.ufuncs.log(x)/xr.ufuncs.log(base) + l = xr.ufuncs.log(x)/xr.ufuncs.log(base) + if isinstance(x, xr.DataArray): + l.attrs = x.attrs + return l @staticmethod def exec_da(): @@ -1852,7 +1855,16 @@ def exec_xar(y, x): The computed angles in radians. """ - return xr.ufuncs.arctan2(y, x) + arct = xr.ufuncs.arctan2(y, x) + if isinstance(x, xr.DataArray) and isinstance(y, xr.DataArray): + for a in x.attrs: + if a in y.attrs: + arct.attrs[a] = x.attrs[a] + elif isinstance(x, xr.DataArray): + arct.attrs = x.attrs + elif isinstance(y, xr.DataArray): + arct.attrs = y.attrs + return arct @staticmethod def exec_da(): @@ -1979,7 +1991,9 @@ def exec_xar(x, inputMin, inputMax, outputMin=0., outputMax=1.): xr.DataArray : The transformed numbers. """ - return ((x - inputMin) / (inputMax - inputMin)) * (outputMax - outputMin) + outputMin + lsr = ((x - inputMin) / (inputMax - inputMin)) * (outputMax - outputMin) + outputMin + lsr.attrs = x.attrs + return lsr @staticmethod def exec_da(): @@ -2072,7 +2086,9 @@ def exec_xar(x, factor=1.): The scaled numbers. """ - return x*factor + s = x*factor + s.attrs = x.attrs + return s @staticmethod def exec_da(): @@ -2164,7 +2180,16 @@ def exec_xar(x, y): xr.DataArray : The remainders after division. """ - return x % y if x is not None and y is not None else None # xr.ufuncs.fmod(x, y) + m = x % y + if isinstance(x, xr.DataArray) and isinstance(y, xr.DataArray): + for a in x.attrs: + if a in y.attrs: + m.attrs[a] = x.attrs[a] + elif isinstance(x, xr.DataArray): + m.attrs = x.attrs + elif isinstance(y, xr.DataArray): + m.attrs = y.attrs + return m @staticmethod def exec_da(): @@ -2630,8 +2655,10 @@ def exec_xar(data, ignore_nodata=True, dimension=None): """ if is_empty(data): return np.nan - - return data.mean(dim=dimension, skipna=~ignore_nodata) + m = data.mean(dim=dimension, skipna=~ignore_nodata) + if isinstance(data, xr.DataArray): + m.attrs = data.attrs + return m @staticmethod def exec_da(): @@ -2726,8 +2753,10 @@ def exec_xar(data, ignore_nodata=True, dimension=None): if not dimension: dimension = data.dims[0] - - return data.min(dim=dimension, skipna=~ignore_nodata) + m = data.min(dim=dimension, skipna=~ignore_nodata) + if isinstance(data, xr.DataArray): + m.attrs = data.attrs + return m @staticmethod def exec_da(): @@ -2816,8 +2845,10 @@ def exec_xar(data, ignore_nodata=True, dimension=None): if not dimension: dimension = data.dims[0] - - return data.max(dim=dimension, skipna=~ignore_nodata) + m = data.max(dim=dimension, skipna=~ignore_nodata) + if isinstance(data, xr.DataArray): + m.attrs = data.attrs + return m @staticmethod def exec_da(): @@ -2918,8 +2949,10 @@ def exec_xar(data, ignore_nodata=True, dimension=None): """ if is_empty(data): return np.nan - - return data.median(dim=dimension, skipna=~ignore_nodata) + m = data.median(dim=dimension, skipna=~ignore_nodata) + if isinstance(data, xr.DataArray): + m.attrs = data.attrs + return m @staticmethod def exec_da(): @@ -3119,8 +3152,10 @@ def exec_xar(data, ignore_nodata=True, dimension=None): if is_empty(data): return np.nan - - return data.var(dim=dimension, skipna=~ignore_nodata) + v = data.var(dim=dimension, skipna=~ignore_nodata) + if isinstance(data, xr.DataArray): + v.attrs = data.attrs + return v @staticmethod def exec_da(): @@ -3214,7 +3249,7 @@ def exec_xar(data, ignore_nodata=True, dimension=None): maximum = data.max(dim=dimension, skipna=~ignore_nodata) extrema = xr.concat([minimum, maximum], dim='extrema') extrema['extrema'] = ['min', 'max'] - + extrema.attrs = data.attrs return extrema @@ -3474,8 +3509,10 @@ def exec_xar(data, probabilities=None, q=None, ignore_nodata=True, dimension=0): if is_empty(data): return [np.nan] * len(probabilities) - - return data.quantile(np.array(probabilities), dim=dimension, skipna=~ignore_nodata) + q = data.quantile(np.array(probabilities), dim=dimension, skipna=~ignore_nodata) + if isinstance(data, xr.DataArray): + q.attrs = data.attrs + return q @staticmethod def exec_da(): @@ -4035,6 +4072,12 @@ def exec_xar(data, ignore_nodata=True, dimension=None): summand += item # Concatenate along dim 'new_dim' data = xr.concat(data_tmp, dim='new_dim') + elif isinstance(data, xr.DataArray): + if not dimension: + dimension = data.dims[0] + s = data.sum(dim=dimension, skipna=~ignore_nodata) + s.attrs = data.attrs + return s if is_empty(data): return np.nan @@ -4170,8 +4213,10 @@ def exec_xar(data, ignore_nodata=True, dimension=None, extra_values=None): if not dimension: dimension = data.dims[0] - - return data.prod(dim=dimension, skipna=ignore_nodata) * multiplicand + p = data.prod(dim=dimension, skipna=ignore_nodata) * multiplicand + if isinstance(data, xr.DataArray): + p.attrs = data.attrs + return p @staticmethod def exec_da(): @@ -4697,7 +4742,16 @@ def exec_xar(x, y): xr.DataArray : The computed normalized difference. """ - return (x - y) / (x + y) + nd = (x - y) / (x + y) + if isinstance(x, xr.DataArray) and isinstance(y, xr.DataArray): + for a in x.attrs: + if a in y.attrs: + nd.attrs[a] = x.attrs[a] + elif isinstance(x, xr.DataArray): + nd.attrs = x.attrs + elif isinstance(y, xr.DataArray): + nd.attrs = y.attrs + return nd @staticmethod def exec_da(): From f75148f5debd8e4a2583a60dd237783df2cbec4e Mon Sep 17 00:00:00 2001 From: ValentinaHutter <85164505+ValentinaHutter@users.noreply.github.com> Date: Mon, 27 Sep 2021 13:49:13 +0200 Subject: [PATCH 07/19] Added rename_dimension process, needed for save_result process --- src/openeo_processes/cubes.py | 129 ++++++++++++++++++++++++++++++++++ tests/test_cubes.py | 10 +++ 2 files changed, 139 insertions(+) diff --git a/src/openeo_processes/cubes.py b/src/openeo_processes/cubes.py index 56d833a5..e6921a08 100644 --- a/src/openeo_processes/cubes.py +++ b/src/openeo_processes/cubes.py @@ -620,6 +620,7 @@ def extract_single_timestamp(data_without_time: xr.DataArray, timestamp: datetim tmp = data_var.to_dataset(name=str(n)) else: tmp = data_var.to_dataset(name='result') + tmp.attrs = data_var.attrs # fix dimension order current_dims = tuple(tmp.dims) @@ -1033,3 +1034,131 @@ def exec_xar(data, name): else: raise Exception('DimensionNotAvailable') +############################################################################### +# RenameDimension process +############################################################################### + + +@process +def rename_dimension(): + """ + Rename a dimension. + + Returns + ------- + xr.DataArray : + A data cube with the same dimensions, but the name of one of the dimensions changes. + The old name can not be referred to any longer. + The dimension properties (name, type, labels, reference system and resolution) remain unchanged. + """ + return RenameDimension() + + +class RenameDimension: + """ + Renames a dimension in the data cube while preserving all other properties. + """ + + @staticmethod + def exec_xar(data, source, target): + """ + Parameters + ---------- + data : xr.DataArray + A data cube. + source : str + The current name of the dimension. + Fails with a DimensionNotAvailable exception if the specified dimension does not exist. + target : str + A new Name for the dimension. + Fails with a DimensionExists exception if a dimension with the specified name exists. + + Returns + ------- + xr.DataArray : + A data cube with the same dimensions, but the name of one of the dimensions changes. + The old name can not be referred to any longer. + The dimension properties (name, type, labels, reference system and resolution) remain unchanged. + """ + if source not in data.dims: + raise Exception('DimensionNotAvailable') + elif target in data.dims: + raise Exception('DimensionExists') + return data.rename({source: target}) + + +############################################################################### +# RenameLabels process +############################################################################### + + +@process +def rename_labels(): + """ + Rename dimension labels. + + Returns + ------- + xr.DataArray : + The data cube with the same dimensions. + The dimension properties (name, type, labels, reference system and resolution) remain unchanged, except that for the given dimension the labels change. + The old labels can not be referred to any longer. The number of labels remains the same. + """ + return RenameLabels() + + +class RenameLabels: + """ + Renames a dimension in the data cube while preserving all other properties. + """ + + @staticmethod + def exec_xar(data, dimension, target, source = []): + """ + Parameters + ---------- + data : xr.DataArray + A data cube. + dimension : str + The name of the dimension to rename the labels for. + target : array + The new names for the labels. The dimension labels in the data cube are expected to be enumerated if the parameter target is not specified. + If a target dimension label already exists in the data cube, a LabelExists exception is thrown. + source : array + The names of the labels as they are currently in the data cube. + The array defines an unsorted and potentially incomplete list of labels that should be renamed to the names available in the corresponding array elements in the parameter target. + If one of the source dimension labels doesn't exist, the LabelNotAvailable exception is thrown. + By default, the array is empty so that the dimension labels in the data cube are expected to be enumerated. + + Returns + ------- + xr.DataArray : + The data cube with the same dimensions. + The dimension properties (name, type, labels, reference system and resolution) remain unchanged, except that for the given dimension the labels change. + The old labels can not be referred to any longer. The number of labels remains the same. + """ + if source == []: + source = data[dimension].values + if type(source) in [str, int, float]: + source = [source] + if type(target) in [str, int, float]: + target = [target] + if len(source) != len(target): + raise Exception('LabelMismatch') + source = np.array(source) + for s in source: + if s not in data[dimension].values: + raise Exception('LabelNotAvailable') + target = np.array(target) + for t in target: + if t in data[dimension].values: + raise Exception('LabelExists') + names = np.array([]) + for n in data[dimension].values: + if n in source: + index = np.argwhere(source == n) + names = np.append(names, target[int(index)]) + else: + names = np.append(names, n) + data[dimension] = names + return data diff --git a/tests/test_cubes.py b/tests/test_cubes.py index 2631442a..85222213 100644 --- a/tests/test_cubes.py +++ b/tests/test_cubes.py @@ -223,5 +223,15 @@ def test_drop_dimension(self): data = oeop.add_dimension(self.test_data.xr_data_factor(), 'cubes', 'Cube01') xr.testing.assert_equal(oeop.drop_dimension(data, 'cubes'), self.test_data.xr_data_factor()) + def test_rename_dimension(self): + """Tests 'rename_dimension' function. """ + data = oeop.rename_dimension(self.test_data.xr_data_factor(), 'x', 'longitude') + assert (data.dims == ('time', 'y', 'longitude')) + + def test_rename_labels(self): + """Tests 'rename_labels' function. """ + data = oeop.rename_labels(self.test_data.xr_data_factor(), 'x', [119, 120, 121], [118.9, 119.9, 120.9]) + assert (data['x'].values == (119, 120, 121)).all() + if __name__ == "__main__": unittest.main() From f1829782a2cc1961cd0f6d2b4b9ae9be83d3b10f Mon Sep 17 00:00:00 2001 From: ValentinaHutter <85164505+ValentinaHutter@users.noreply.github.com> Date: Tue, 28 Sep 2021 12:00:55 +0200 Subject: [PATCH 08/19] Inserted keep_attrs function --- src/openeo_processes/comparison.py | 112 ++++++++++------------------- src/openeo_processes/cubes.py | 7 +- src/openeo_processes/math.py | 74 +++---------------- src/openeo_processes/utils.py | 19 ++++- 4 files changed, 68 insertions(+), 144 deletions(-) diff --git a/src/openeo_processes/comparison.py b/src/openeo_processes/comparison.py index 4c91fa94..a07e62d4 100644 --- a/src/openeo_processes/comparison.py +++ b/src/openeo_processes/comparison.py @@ -5,6 +5,7 @@ import xarray as xr from openeo_processes.utils import process from openeo_processes.utils import str2time +from openeo_processes.utils import keep_attrs # TODO: test if this works for different data types @@ -383,9 +384,9 @@ def exec_xar(x, y, delta=False, case_sensitive=True, reduce=False): # TODO: add Parameters ---------- - x : xr.DataArray + x : xr.DataArray, integer, float First operand. - y : xr.DataArray + y : xr.DataArray, integer, float Second operand. delta : float, optional Only applicable for comparing two arrays containing numbers. If this optional parameter is set to a @@ -430,14 +431,7 @@ def exec_xar(x, y, delta=False, case_sensitive=True, reduce=False): # TODO: add ar_eq = x_time == y_time # comparison of dates else: ar_eq = x == y - if isinstance(x, xr.DataArray) and isinstance(y, xr.DataArray): - for a in x.attrs: - if a in y.attrs: - ar_eq.attrs[a] = x.attrs[a] - elif isinstance(x, xr.DataArray): - ar_eq.attrs = x.attrs - elif isinstance(y, xr.DataArray): - ar_eq.attrs = y.attrs + ar_eq = keep_attrs(x, y, ar_eq) if reduce: return ar_eq.all() else: @@ -553,9 +547,9 @@ def exec_xar(x, y, delta=None, case_sensitive=True, reduce=False): # TODO: add Parameters ---------- - x : xr.DataArray + x : xr.DataArray, integer, float First operand. - y : xr.DataArray + y : xr.DataArray, integer, float Second operand. delta : float, optional Only applicable for comparing two arrays containing numbers. If this optional parameter is set to a @@ -694,7 +688,7 @@ def exec_xar(x, y, reduce=False): Parameters ---------- - x : xr.DataArray + x : xr.DataArray, integer, float First operand. y : xr.DataArray, integer, float Second operand. @@ -710,21 +704,13 @@ def exec_xar(x, y, reduce=False): if x is None or y is None: return None - if isinstance(x, xr.DataArray) and isinstance(y, xr.DataArray): - gt_ar = x > y - for a in x.attrs: - if a in y.attrs: - gt_ar.attrs[a] = x.attrs[a] - elif isinstance(x, xr.DataArray) and isinstance(y, int) or isinstance(y, float): + elif type(x) in [int, float, xr.DataArray] and type(y) in [int, float, xr.DataArray]: gt_ar = x > y - gt_ar.attrs = x.attrs - elif isinstance(y, xr.DataArray) and isinstance(x, int) or isinstance(x, float): - gt_ar = x > y - gt_ar.attrs = y.attrs - if reduce: - return gt_ar.all() - else: - return gt_ar + gt_ar = keep_attrs(x, y, gt_ar) + if reduce: + return gt_ar.all() + else: + return gt_ar return False @staticmethod @@ -841,9 +827,9 @@ def exec_xar(x, y, reduce = False): Parameters ---------- - x : xr.DataArray + x : xr.DataArray, integer, float First operand. - y : xr.DataArray + y : xr.DataArray, integer, float Second operand. reduce : bool, optional If True, one value will be returned. @@ -857,21 +843,13 @@ def exec_xar(x, y, reduce = False): """ if x is None or y is None: return None - if isinstance(x, xr.DataArray) and isinstance(y, xr.DataArray): - gte_ar = ((x-y) >= 0) - for a in x.attrs: - if a in y.attrs: - gte_ar.attrs[a] = x.attrs[a] - elif isinstance(x, xr.DataArray) and isinstance(y, int) or isinstance(y, float): - gte_ar = ((x-y) >= 0) - gte_ar.attrs = x.attrs - elif isinstance(y, xr.DataArray) and isinstance(x, int) or isinstance(x, float): + elif type(x) in [int, float, xr.DataArray] and type(y) in [int, float, xr.DataArray]: gte_ar = ((x-y) >= 0) - gte_ar.attrs = y.attrs - if reduce: - return gte_ar.all() - else: - return gte_ar + gte_ar = keep_attrs(x, y, gte_ar) + if reduce: + return gte_ar.all() + else: + return gte_ar return False @staticmethod @@ -988,9 +966,9 @@ def exec_xar(x, y, reduce = False): Parameters ---------- - x : xr.DataArray + x : xr.DataArray, integer, float First operand. - y : xr.DataArray + y : xr.DataArray, integer, float Second operand. reduce : bool, optional If True, one value will be returned. @@ -1004,21 +982,13 @@ def exec_xar(x, y, reduce = False): """ if x is None or y is None: return None - if isinstance(x, xr.DataArray) and isinstance(y, xr.DataArray): + elif type(x) in [int, float, xr.DataArray] and type(y) in [int, float, xr.DataArray]: lt_ar = x < y - for a in x.attrs: - if a in y.attrs: - lt_ar.attrs[a] = x.attrs[a] - elif isinstance(x, xr.DataArray) and isinstance(y, int) or isinstance(y, float): - lt_ar = x < y - lt_ar.attrs = x.attrs - elif isinstance(y, xr.DataArray) and isinstance(x, int) or isinstance(x, float): - lt_ar = x < y - lt_ar.attrs = y.attrs - if reduce: - return lt_ar.all() - else: - return lt_ar + lt_ar = keep_attrs(x, y, lt_ar) + if reduce: + return lt_ar.all() + else: + return lt_ar return False @staticmethod @@ -1135,9 +1105,9 @@ def exec_xar(x, y, reduce = False): Parameters ---------- - x : xr.DataArray + x : xr.DataArray, integer, float First operand. - y : xr.DataArray + y : xr.DataArray, integer, float Second operand. reduce : bool, optional If True, one value will be returned. @@ -1151,21 +1121,13 @@ def exec_xar(x, y, reduce = False): """ if x is None or y is None: return None - if isinstance(x, xr.DataArray) and isinstance(y, xr.DataArray): - lte_ar = x <= y - for a in x.attrs: - if a in y.attrs: - lte_ar.attrs[a] = x.attrs[a] - elif isinstance(x, xr.DataArray) and isinstance(y, int) or isinstance(y, float): + elif type(x) in [int, float, xr.DataArray] and type(y) in [int, float, xr.DataArray]: lte_ar = x <= y - lte_ar.attrs = x.attrs - elif isinstance(y, xr.DataArray) and isinstance(x, int) or isinstance(x, float): - lte_ar = x <= y - lte_ar.attrs = y.attrs - if reduce: - return lte_ar.all() - else: - return lte_ar + lte_ar = keep_attrs(x, y, lte_ar) + if reduce: + return lte_ar.all() + else: + return lte_ar return False @staticmethod diff --git a/src/openeo_processes/cubes.py b/src/openeo_processes/cubes.py index e6921a08..b818add2 100644 --- a/src/openeo_processes/cubes.py +++ b/src/openeo_processes/cubes.py @@ -371,7 +371,7 @@ def exec_xar(cube1, cube2, overlap_resolver = None, context={}): else: raise Exception('OverlapResolverMissing') for a in cube1.attrs: - if a in cube2.attrs: + if a in cube2.attrs and (cube1.attrs[a] == cube2.attrs[a]): merge.attrs[a] = cube1.attrs[a] return merge @@ -620,7 +620,6 @@ def extract_single_timestamp(data_without_time: xr.DataArray, timestamp: datetim tmp = data_var.to_dataset(name=str(n)) else: tmp = data_var.to_dataset(name='result') - tmp.attrs = data_var.attrs # fix dimension order current_dims = tuple(tmp.dims) @@ -630,7 +629,7 @@ def extract_single_timestamp(data_without_time: xr.DataArray, timestamp: datetim elif current_dims != ("y", "x"): tmp = tmp.transpose("y", "x") - tmp.attrs = data_without_time.attrs + tmp.attrs = data_var.attrs # This is a hack! ODC always(!) expectes to have a time dimension # set datetime to now if no other information is available tmp.attrs["datetime_from_dim"] = str(timestamp) if timestamp else str(datetime.now()) @@ -815,7 +814,7 @@ def exec_xar(data, target, dimension = None, valid_within = None): for i in index: t.append(target[dimension].values[int(i)]) filter_values = data[dimension].values - new_data = data + new_data = data #ATTENTION new_data is a shallow copy of data! When you change new_data also data is changed. new_data[dimension] = t else: index = [] diff --git a/src/openeo_processes/math.py b/src/openeo_processes/math.py index df7e892c..b26790ee 100644 --- a/src/openeo_processes/math.py +++ b/src/openeo_processes/math.py @@ -10,7 +10,7 @@ except ImportError: xar_addons = None -from openeo_processes.utils import process +from openeo_processes.utils import process, keep_attrs from openeo_processes.comparison import is_empty from openeo_processes.errors import QuantilesParameterConflict @@ -1856,15 +1856,7 @@ def exec_xar(y, x): """ arct = xr.ufuncs.arctan2(y, x) - if isinstance(x, xr.DataArray) and isinstance(y, xr.DataArray): - for a in x.attrs: - if a in y.attrs: - arct.attrs[a] = x.attrs[a] - elif isinstance(x, xr.DataArray): - arct.attrs = x.attrs - elif isinstance(y, xr.DataArray): - arct.attrs = y.attrs - return arct + return keep_attrs(x, y, arct) @staticmethod def exec_da(): @@ -2180,16 +2172,10 @@ def exec_xar(x, y): xr.DataArray : The remainders after division. """ + if x is None or y is None: + return None m = x % y - if isinstance(x, xr.DataArray) and isinstance(y, xr.DataArray): - for a in x.attrs: - if a in y.attrs: - m.attrs[a] = x.attrs[a] - elif isinstance(x, xr.DataArray): - m.attrs = x.attrs - elif isinstance(y, xr.DataArray): - m.attrs = y.attrs - return m + return keep_attrs(x, y, m) @staticmethod def exec_da(): @@ -4311,15 +4297,7 @@ def exec_xar(x, y): """ added = x + y - if isinstance(x, xr.DataArray) and isinstance(y, xr.DataArray): - for a in x.attrs: - if a in y.attrs: - added.attrs[a] = x.attrs[a] - elif isinstance(x, xr.DataArray): - added.attrs = x.attrs - elif isinstance(y, xr.DataArray): - added.attrs = y.attrs - return added + return keep_attrs(x, y, added) @staticmethod def exec_da(): @@ -4417,15 +4395,7 @@ def exec_xar(x, y): """ sub = x - y - if isinstance(x, xr.DataArray) and isinstance(y, xr.DataArray): - for a in x.attrs: - if a in y.attrs: - sub.attrs[a] = x.attrs[a] - elif isinstance(x, xr.DataArray): - sub.attrs = x.attrs - elif isinstance(y, xr.DataArray): - sub.attrs = y.attrs - return sub + return keep_attrs(x, y, sub) @staticmethod def exec_da(): @@ -4522,15 +4492,7 @@ def exec_xar(x, y): """ mult = x * y - if isinstance(x, xr.DataArray) and isinstance(y, xr.DataArray): - for a in x.attrs: - if a in y.attrs: - mult.attrs[a] = x.attrs[a] - elif isinstance(x, xr.DataArray): - mult.attrs = x.attrs - elif isinstance(y, xr.DataArray): - mult.attrs = y.attrs - return mult + return keep_attrs(x, y, mult) @staticmethod def exec_da(): @@ -4627,15 +4589,7 @@ def exec_xar(x, y): """ div = x / y - if isinstance(x, xr.DataArray) and isinstance(y, xr.DataArray): - for a in x.attrs: - if a in y.attrs: - div.attrs[a] = x.attrs[a] - elif isinstance(x, xr.DataArray): - div.attrs = x.attrs - elif isinstance(y, xr.DataArray): - div.attrs = y.attrs - return div + return keep_attrs(x, y, div) @staticmethod def exec_da(): @@ -4743,15 +4697,7 @@ def exec_xar(x, y): The computed normalized difference. """ nd = (x - y) / (x + y) - if isinstance(x, xr.DataArray) and isinstance(y, xr.DataArray): - for a in x.attrs: - if a in y.attrs: - nd.attrs[a] = x.attrs[a] - elif isinstance(x, xr.DataArray): - nd.attrs = x.attrs - elif isinstance(y, xr.DataArray): - nd.attrs = y.attrs - return nd + return keep_attrs(x, y, nd) @staticmethod def exec_da(): diff --git a/src/openeo_processes/utils.py b/src/openeo_processes/utils.py index f4327b44..a88ef40d 100644 --- a/src/openeo_processes/utils.py +++ b/src/openeo_processes/utils.py @@ -104,7 +104,7 @@ def fun_wrapper(*args, **kwargs): elif "numpy" in datatypes: cls_fun = getattr(cls, "exec_np") elif "dask" in datatypes: - cls_fun = getattr(cls, "exec_xar") + cls_fun = getattr(cls, "exec_dar") elif datatypes.issubset({"int", "float", "NoneType", "str", "bool", "datetime"}): cls_fun = getattr(cls, "exec_num") elif "tuple" in datatypes: @@ -290,6 +290,23 @@ def get_time_dimension_from_data(data: xr.DataArray, dim: str = "time") -> str: if time_dim in data.dims: return time_dim +def keep_attrs(x, y, data): + """Keeps the attributes of the inputs x and y in the output data. + + When a processes, which requires two inputs x and y is used, + the attributes of x and y are not forwarded to the output data. + This checks if one of the inputs is a Dataarray, which has attributes. + The attributes of x or y are then given to the output data. + """ + if isinstance(x, xr.DataArray) and isinstance(y, xr.DataArray): + for a in x.attrs: + if a in y.attrs and (x.attrs[a] == y.attrs[a]): + data.attrs[a] = x.attrs[a] + elif isinstance(x, xr.DataArray): + data.attrs = x.attrs + elif isinstance(y, xr.DataArray): + data.attrs = y.attrs + return data if __name__ == '__main__': pass From 9bbeab35ad49fa89e01d9b866a08b9d72bf41d29 Mon Sep 17 00:00:00 2001 From: ValentinaHutter <85164505+ValentinaHutter@users.noreply.github.com> Date: Tue, 28 Sep 2021 18:04:24 +0200 Subject: [PATCH 09/19] Save netCDF files with save_mfdataset --- src/openeo_processes/cubes.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/openeo_processes/cubes.py b/src/openeo_processes/cubes.py index b818add2..36a92ff8 100644 --- a/src/openeo_processes/cubes.py +++ b/src/openeo_processes/cubes.py @@ -668,9 +668,10 @@ def create_output_filepath(output_filepath: str, idx: int = 0, ext: str = "nc") formats = ('GTiff', 'netCDF') if format.lower() == 'netcdf': data_list = refactor_data(data) - for idx, dataset in enumerate(data_list): - cur_output_filepath = create_output_filepath(output_filepath, idx, 'nc') - dataset.to_netcdf(path=cur_output_filepath) + paths = [] + for idx in range(len(data_list)): + paths.append(create_output_filepath(output_filepath, idx, 'nc')) + xr.save_mfdataset(data_list, paths) elif format.lower() in ['gtiff','geotiff']: data_list = refactor_data(data) From 4c61600a31ebd1932c219ec9338d28d980d2f7e3 Mon Sep 17 00:00:00 2001 From: sherrmann Date: Thu, 30 Sep 2021 11:42:22 +0200 Subject: [PATCH 10/19] Create netcdf file with multiple timestamps. As discussed https://github.com/openEOPlatform/architecture-docs/issues/27 Signed-off-by: sherrmann --- src/openeo_processes/cubes.py | 36 +++++++++++++++++++++++------------ tests/test_cubes.py | 4 ++++ 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/src/openeo_processes/cubes.py b/src/openeo_processes/cubes.py index 36a92ff8..ae327711 100644 --- a/src/openeo_processes/cubes.py +++ b/src/openeo_processes/cubes.py @@ -607,32 +607,38 @@ def exec_xar(data, output_filepath='out', format='GTiff', options={}, write_prod data format (default: GTiff) """ - def extract_single_timestamp(data_without_time: xr.DataArray, timestamp: datetime = None) -> xr.Dataset: + def reformat_dataset(dataset: xr.DataArray, timestamp: datetime = None, has_time_dim: bool = False) \ + -> xr.Dataset: """Create a xarray Dataset.""" - data_var = data_without_time.fillna(-9999) + data_var = dataset.fillna(-9999) data_var.attrs["nodata"] = -9999 - if 'bands' in data_without_time.coords: + if 'bands' in dataset.coords: try: tmp = data_var.to_dataset(dim="bands") except Exception as e: print(e) - n = data_without_time['bands'].values + n = dataset['bands'].values tmp = data_var.to_dataset(name=str(n)) else: tmp = data_var.to_dataset(name='result') # fix dimension order current_dims = tuple(tmp.dims) - additional_dim = list(set(current_dims).difference({"bands", "y", "x"})) - if additional_dim and current_dims != (additional_dim[0], "y", "x"): - tmp = tmp.transpose(additional_dim[0], "y", "x") - elif current_dims != ("y", "x"): - tmp = tmp.transpose("y", "x") + base_dims = ("bands", "time", "y", "x") if has_time_dim else ("bands", "y", "x") + additional_dim = list(set(current_dims).difference(set(base_dims))) + base_dims = base_dims[1:] # remove bands elem > not a dimension! + if additional_dim and current_dims != (additional_dim[0], *base_dims): + tmp = tmp.transpose(additional_dim[0], *base_dims) + elif current_dims != base_dims: + tmp = tmp.transpose(*base_dims) tmp.attrs = data_var.attrs # This is a hack! ODC always(!) expectes to have a time dimension # set datetime to now if no other information is available - tmp.attrs["datetime_from_dim"] = str(timestamp) if timestamp else str(datetime.now()) + if has_time_dim: + tmp = tmp.rename_dims({"time": "t"}) + else: + tmp.attrs["datetime_from_dim"] = str(timestamp) if timestamp else str(datetime.now()) if "crs" not in tmp.attrs: first_data_var = tmp.data_vars[list(tmp.data_vars.keys())[0]] tmp.attrs["crs"] = first_data_var.geobox.crs.to_wkt() @@ -645,9 +651,9 @@ def refactor_data(data: xr.DataArray) -> List[xr.Dataset]: if 'time' in data.coords: for timestamp in data.time.values: data_at_timestamp = data.loc[dict(time=timestamp)] - all_tmp.append(extract_single_timestamp(data_at_timestamp, timestamp)) + all_tmp.append(reformat_dataset(data_at_timestamp, timestamp)) else: - all_tmp.append(extract_single_timestamp(data)) + all_tmp.append(reformat_dataset(data)) return all_tmp @@ -671,6 +677,12 @@ def create_output_filepath(output_filepath: str, idx: int = 0, ext: str = "nc") paths = [] for idx in range(len(data_list)): paths.append(create_output_filepath(output_filepath, idx, 'nc')) + # combined dataset + if 'time' in data.coords: + combined_dataset = reformat_dataset(data, None, has_time_dim=True) + data_list.append(combined_dataset) + combined_path = f'{splitext(output_filepath)[0]}_combined.nc' + paths.append(combined_path) xr.save_mfdataset(data_list, paths) elif format.lower() in ['gtiff','geotiff']: diff --git a/tests/test_cubes.py b/tests/test_cubes.py index 85222213..3b3e4467 100644 --- a/tests/test_cubes.py +++ b/tests/test_cubes.py @@ -76,6 +76,7 @@ def test_save_result(self): out_filename = "out.nc" out_filename_0 = "out_00000.nc" out_filename_1 = "out_00001.nc" + out_filename_combined = "out_combined.nc" oeop.save_result(self.test_data.xr_odc_data_3d, out_filename, format='netCDF') assert os.path.exists(out_filename_0) @@ -92,9 +93,11 @@ def test_save_result(self): oeop.save_result(self.test_data.xr_odc_data_4d, format='netCDF') assert os.path.exists(out_filename_0) assert os.path.exists(out_filename_1) + assert os.path.exists(out_filename_combined) assert os.path.exists(out_product) os.remove(out_filename_0) os.remove(out_filename_1) + os.remove(out_filename_combined) os.remove(out_product) def test_save_result_from_file(self): @@ -114,6 +117,7 @@ def test_save_result_from_file(self): assert actual_ds_0.result.dims == ("y", "x") for i in range(10): os.remove(f"out_{str(i).zfill(5)}.nc") + os.remove("out_combined.nc") os.remove("product.yml") def test_fit_curve(self): From ccbd2cbc1ab43fbbabb390643bdc1d2574d2af35 Mon Sep 17 00:00:00 2001 From: ValentinaHutter <85164505+ValentinaHutter@users.noreply.github.com> Date: Mon, 4 Oct 2021 10:02:16 +0200 Subject: [PATCH 11/19] Use xr.curvefit for fit curve function --- src/openeo_processes/cubes.py | 24 +++--------------------- tests/test_cubes.py | 7 ++++--- 2 files changed, 7 insertions(+), 24 deletions(-) diff --git a/src/openeo_processes/cubes.py b/src/openeo_processes/cubes.py index ae327711..19dbda13 100644 --- a/src/openeo_processes/cubes.py +++ b/src/openeo_processes/cubes.py @@ -433,7 +433,6 @@ def exec_xar(data, parameters, function, dimension): xr.DataArray A data cube with the optimal values for the parameters. """ - data = data.fillna(0) # zero values (masked) are not considered if dimension in ['time', 't', 'times']: # time dimension must be converted into values dimension = get_time_dimension_from_data(data, dimension) dates = data[dimension].values @@ -443,26 +442,9 @@ def exec_xar(data, parameters, function, dimension): else: step = dimension - if isinstance(parameters, xr.core.dataarray.DataArray): - apply_f = (lambda x, y, p: optimize.curve_fit(function, x[np.nonzero(y)], y[np.nonzero(y)], p)[0]) - in_dims = [[dimension], [dimension], ['params']] - add_arg = [step, data, parameters] - output_size = len(parameters['params']) - else: - apply_f = (lambda x, y: optimize.curve_fit(function, x[np.nonzero(y)], y[np.nonzero(y)], parameters)[0]) - in_dims = [[dimension], [dimension]] - add_arg = [step, data] - output_size = len(parameters) - values = xr.apply_ufunc( - apply_f, *add_arg, - vectorize=True, - input_core_dims=in_dims, - output_core_dims=[['params']], - dask="parallelized", - output_dtypes=[np.float32], - dask_gufunc_kwargs={'allow_rechunk': True, 'output_sizes': {'params': output_size}} - ) - values['params'] = list(range(len(values['params']))) + values = (data.curvefit(dimension, function, reduce_dims=dimension, skipna=True, p0=parameters, bounds=None, param_names=list(range(len(parameters))), kwargs=None)) + values = values.curvefit_coefficients + values = values.rename({"param": "params"}) values.attrs = data.attrs return values diff --git a/tests/test_cubes.py b/tests/test_cubes.py index 3b3e4467..073b7654 100644 --- a/tests/test_cubes.py +++ b/tests/test_cubes.py @@ -153,9 +153,10 @@ def func_oeop(x, *parameters): params = (oeop.fit_curve(xdata, parameters=[1, 1, 1], function=func_oeop, dimension='time')) assert (np.isclose(params, [0, 1, 0.5], atol=0.3)).all() # output should be close to 0, 1, 0.5 - params_2 = (oeop.fit_curve(xdata, parameters=params, function=func_oeop, dimension='time')) - assert (np.isclose(params_2, [0, 1, 0.5], atol=0.3)).all() - assert (np.isclose(params, params_2, atol=0.01)).all() + # TODO: parameters input only allows list of parameters, not xr.DataArray of parameters right now + #params_2 = (oeop.fit_curve(xdata, parameters=params, function=func_oeop, dimension='time')) + #assert (np.isclose(params_2, [0, 1, 0.5], atol=0.3)).all() + #assert (np.isclose(params, params_2, atol=0.01)).all() def test_predict_curve(self): """Tests 'predict_curve' function. """ From 49de4ab3124ee717bbec376c741bf51492b2ef3a Mon Sep 17 00:00:00 2001 From: ValentinaHutter <85164505+ValentinaHutter@users.noreply.github.com> Date: Mon, 4 Oct 2021 11:15:17 +0200 Subject: [PATCH 12/19] rechunk data in fit curve --- src/openeo_processes/cubes.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/openeo_processes/cubes.py b/src/openeo_processes/cubes.py index 19dbda13..5c851b1f 100644 --- a/src/openeo_processes/cubes.py +++ b/src/openeo_processes/cubes.py @@ -439,6 +439,8 @@ def exec_xar(data, parameters, function, dimension): timestep = [((x - np.datetime64('1970-01-01')) / np.timedelta64(1, 's')) for x in dates] step = np.array(timestep) data[dimension] = step + if 'x' in data.dims and 'y' in data.dims: + data = data.chunk({'lat':100, 'lon':100, dimension:len(dates)}) else: step = dimension From 252394db283f70733634aad9f30923413860b6c9 Mon Sep 17 00:00:00 2001 From: ValentinaHutter <85164505+ValentinaHutter@users.noreply.github.com> Date: Mon, 4 Oct 2021 11:34:22 +0200 Subject: [PATCH 13/19] rechunk time --- src/openeo_processes/cubes.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/openeo_processes/cubes.py b/src/openeo_processes/cubes.py index 5c851b1f..48584197 100644 --- a/src/openeo_processes/cubes.py +++ b/src/openeo_processes/cubes.py @@ -440,7 +440,12 @@ def exec_xar(data, parameters, function, dimension): step = np.array(timestep) data[dimension] = step if 'x' in data.dims and 'y' in data.dims: + data = data.chunk({'x':100, 'y':100, dimension:len(dates)}) + elif 'lat' in data.dims and 'lon' in data.dims: data = data.chunk({'lat':100, 'lon':100, dimension:len(dates)}) + else: + data = data.chunk({dimension:len(dates)}) + else: step = dimension From e661815b5ae5c7cc3aa2912637d05763c7f40b11 Mon Sep 17 00:00:00 2001 From: sherrmann Date: Mon, 4 Oct 2021 11:40:49 +0200 Subject: [PATCH 14/19] No only rename dimension, but whole coordinate. Signed-off-by: sherrmann --- src/openeo_processes/cubes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/openeo_processes/cubes.py b/src/openeo_processes/cubes.py index 48584197..6ff249d0 100644 --- a/src/openeo_processes/cubes.py +++ b/src/openeo_processes/cubes.py @@ -625,7 +625,7 @@ def reformat_dataset(dataset: xr.DataArray, timestamp: datetime = None, has_time # This is a hack! ODC always(!) expectes to have a time dimension # set datetime to now if no other information is available if has_time_dim: - tmp = tmp.rename_dims({"time": "t"}) + tmp = tmp.rename({"time": "t"}) else: tmp.attrs["datetime_from_dim"] = str(timestamp) if timestamp else str(datetime.now()) if "crs" not in tmp.attrs: From f658a7d70252909f0b405570f44ba50d86088561 Mon Sep 17 00:00:00 2001 From: ValentinaHutter <85164505+ValentinaHutter@users.noreply.github.com> Date: Mon, 4 Oct 2021 12:27:31 +0200 Subject: [PATCH 15/19] rechunk data time, without rechunking x,y --- src/openeo_processes/cubes.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/openeo_processes/cubes.py b/src/openeo_processes/cubes.py index 6ff249d0..44300c09 100644 --- a/src/openeo_processes/cubes.py +++ b/src/openeo_processes/cubes.py @@ -439,16 +439,10 @@ def exec_xar(data, parameters, function, dimension): timestep = [((x - np.datetime64('1970-01-01')) / np.timedelta64(1, 's')) for x in dates] step = np.array(timestep) data[dimension] = step - if 'x' in data.dims and 'y' in data.dims: - data = data.chunk({'x':100, 'y':100, dimension:len(dates)}) - elif 'lat' in data.dims and 'lon' in data.dims: - data = data.chunk({'lat':100, 'lon':100, dimension:len(dates)}) - else: - data = data.chunk({dimension:len(dates)}) else: step = dimension - + data = data.chunk({dimension: len(dates)}) values = (data.curvefit(dimension, function, reduce_dims=dimension, skipna=True, p0=parameters, bounds=None, param_names=list(range(len(parameters))), kwargs=None)) values = values.curvefit_coefficients values = values.rename({"param": "params"}) From 7c5c459ac946bf4aa198ba7a7e4cb93a83fa11ac Mon Sep 17 00:00:00 2001 From: ValentinaHutter <85164505+ValentinaHutter@users.noreply.github.com> Date: Mon, 4 Oct 2021 13:39:57 +0200 Subject: [PATCH 16/19] try splitting in to arrays --- src/openeo_processes/cubes.py | 26 ++++++++++++++++++++++---- tests/test_cubes.py | 2 +- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/src/openeo_processes/cubes.py b/src/openeo_processes/cubes.py index 44300c09..6394d26e 100644 --- a/src/openeo_processes/cubes.py +++ b/src/openeo_processes/cubes.py @@ -439,12 +439,30 @@ def exec_xar(data, parameters, function, dimension): timestep = [((x - np.datetime64('1970-01-01')) / np.timedelta64(1, 's')) for x in dates] step = np.array(timestep) data[dimension] = step - + if "x" in data.dims: + data = data.chunk({dimension: (len(dates))}) + x = data['x'].values + l2 = int(np.around(len(x) / 2)) + x1 = x[:l2] + x2 = x[l2:] + split1 = data.sel(x=x1) + split2 = data.sel(x=x2) + values1 = ( + split1.curvefit(dimension, function, reduce_dims=dimension, skipna=True, p0=parameters, bounds=None, + param_names=list(range(len(parameters))), kwargs=None)).curvefit_coefficients + values2 = ( + split2.curvefit(dimension, function, reduce_dims=dimension, skipna=True, p0=parameters, bounds=None, + param_names=list(range(len(parameters))), kwargs=None)).curvefit_coefficients + values = xr.concat([values1, values2], dim="x") + else: + data = data.chunk({dimension: len(dates)}) + values = (data.curvefit(dimension, function, reduce_dims=dimension, skipna=True, p0=parameters, bounds=None, param_names=list(range(len(parameters))), kwargs=None)) + values = values.curvefit_coefficients else: step = dimension - data = data.chunk({dimension: len(dates)}) - values = (data.curvefit(dimension, function, reduce_dims=dimension, skipna=True, p0=parameters, bounds=None, param_names=list(range(len(parameters))), kwargs=None)) - values = values.curvefit_coefficients + data = data.chunk({dimension: len(dates)}) + values = (data.curvefit(dimension, function, reduce_dims=dimension, skipna=True, p0=parameters, bounds=None, param_names=list(range(len(parameters))), kwargs=None)) + values = values.curvefit_coefficients values = values.rename({"param": "params"}) values.attrs = data.attrs return values diff --git a/tests/test_cubes.py b/tests/test_cubes.py index 073b7654..91559ddb 100644 --- a/tests/test_cubes.py +++ b/tests/test_cubes.py @@ -127,7 +127,7 @@ def test_fit_curve(self): np.cos(rang) + 0.5 * np.sin(rang) + np.random.rand( 24) * 0.2] # define data with y = 0 + 1 * cos() + 0.5 *sin() xdata = xr.DataArray(rang, coords=[["NY", "LA"], pd.date_range("2000-01-01", periods=24, freq='M')], - dims=["space", "time"]) + dims=["x", "time"]) def func_oeop(x, *parameters): _2sjyaa699_11 = oeop.pi(**{}) From c1600c8ff8e1a78998311e038a445f13821bc739 Mon Sep 17 00:00:00 2001 From: ValentinaHutter <85164505+ValentinaHutter@users.noreply.github.com> Date: Mon, 4 Oct 2021 13:52:18 +0200 Subject: [PATCH 17/19] old fit_curve version --- src/openeo_processes/cubes.py | 45 +++++++++++++++++------------------ tests/test_cubes.py | 9 ++++--- 2 files changed, 26 insertions(+), 28 deletions(-) diff --git a/src/openeo_processes/cubes.py b/src/openeo_processes/cubes.py index 6394d26e..14cdddf7 100644 --- a/src/openeo_processes/cubes.py +++ b/src/openeo_processes/cubes.py @@ -433,37 +433,36 @@ def exec_xar(data, parameters, function, dimension): xr.DataArray A data cube with the optimal values for the parameters. """ + data = data.fillna(0) # zero values (masked) are not considered if dimension in ['time', 't', 'times']: # time dimension must be converted into values dimension = get_time_dimension_from_data(data, dimension) dates = data[dimension].values timestep = [((x - np.datetime64('1970-01-01')) / np.timedelta64(1, 's')) for x in dates] step = np.array(timestep) data[dimension] = step - if "x" in data.dims: - data = data.chunk({dimension: (len(dates))}) - x = data['x'].values - l2 = int(np.around(len(x) / 2)) - x1 = x[:l2] - x2 = x[l2:] - split1 = data.sel(x=x1) - split2 = data.sel(x=x2) - values1 = ( - split1.curvefit(dimension, function, reduce_dims=dimension, skipna=True, p0=parameters, bounds=None, - param_names=list(range(len(parameters))), kwargs=None)).curvefit_coefficients - values2 = ( - split2.curvefit(dimension, function, reduce_dims=dimension, skipna=True, p0=parameters, bounds=None, - param_names=list(range(len(parameters))), kwargs=None)).curvefit_coefficients - values = xr.concat([values1, values2], dim="x") - else: - data = data.chunk({dimension: len(dates)}) - values = (data.curvefit(dimension, function, reduce_dims=dimension, skipna=True, p0=parameters, bounds=None, param_names=list(range(len(parameters))), kwargs=None)) - values = values.curvefit_coefficients else: step = dimension - data = data.chunk({dimension: len(dates)}) - values = (data.curvefit(dimension, function, reduce_dims=dimension, skipna=True, p0=parameters, bounds=None, param_names=list(range(len(parameters))), kwargs=None)) - values = values.curvefit_coefficients - values = values.rename({"param": "params"}) + + if isinstance(parameters, xr.core.dataarray.DataArray): + apply_f = (lambda x, y, p: optimize.curve_fit(function, x[np.nonzero(y)], y[np.nonzero(y)], p)[0]) + in_dims = [[dimension], [dimension], ['params']] + add_arg = [step, data, parameters] + output_size = len(parameters['params']) + else: + apply_f = (lambda x, y: optimize.curve_fit(function, x[np.nonzero(y)], y[np.nonzero(y)], parameters)[0]) + in_dims = [[dimension], [dimension]] + add_arg = [step, data] + output_size = len(parameters) + values = xr.apply_ufunc( + apply_f, *add_arg, + vectorize=True, + input_core_dims=in_dims, + output_core_dims=[['params']], + dask="parallelized", + output_dtypes=[np.float32], + dask_gufunc_kwargs={'allow_rechunk': True, 'output_sizes': {'params': output_size}} + ) + values['params'] = list(range(len(values['params']))) values.attrs = data.attrs return values diff --git a/tests/test_cubes.py b/tests/test_cubes.py index 91559ddb..3b3e4467 100644 --- a/tests/test_cubes.py +++ b/tests/test_cubes.py @@ -127,7 +127,7 @@ def test_fit_curve(self): np.cos(rang) + 0.5 * np.sin(rang) + np.random.rand( 24) * 0.2] # define data with y = 0 + 1 * cos() + 0.5 *sin() xdata = xr.DataArray(rang, coords=[["NY", "LA"], pd.date_range("2000-01-01", periods=24, freq='M')], - dims=["x", "time"]) + dims=["space", "time"]) def func_oeop(x, *parameters): _2sjyaa699_11 = oeop.pi(**{}) @@ -153,10 +153,9 @@ def func_oeop(x, *parameters): params = (oeop.fit_curve(xdata, parameters=[1, 1, 1], function=func_oeop, dimension='time')) assert (np.isclose(params, [0, 1, 0.5], atol=0.3)).all() # output should be close to 0, 1, 0.5 - # TODO: parameters input only allows list of parameters, not xr.DataArray of parameters right now - #params_2 = (oeop.fit_curve(xdata, parameters=params, function=func_oeop, dimension='time')) - #assert (np.isclose(params_2, [0, 1, 0.5], atol=0.3)).all() - #assert (np.isclose(params, params_2, atol=0.01)).all() + params_2 = (oeop.fit_curve(xdata, parameters=params, function=func_oeop, dimension='time')) + assert (np.isclose(params_2, [0, 1, 0.5], atol=0.3)).all() + assert (np.isclose(params, params_2, atol=0.01)).all() def test_predict_curve(self): """Tests 'predict_curve' function. """ From 2113956141cb9aabe9d66d583b809fe5524357e0 Mon Sep 17 00:00:00 2001 From: ValentinaHutter <85164505+ValentinaHutter@users.noreply.github.com> Date: Mon, 4 Oct 2021 14:26:31 +0200 Subject: [PATCH 18/19] Split fit curve into two datacubes --- src/openeo_processes/cubes.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/openeo_processes/cubes.py b/src/openeo_processes/cubes.py index 14cdddf7..66902984 100644 --- a/src/openeo_processes/cubes.py +++ b/src/openeo_processes/cubes.py @@ -453,6 +453,23 @@ def exec_xar(data, parameters, function, dimension): in_dims = [[dimension], [dimension]] add_arg = [step, data] output_size = len(parameters) + if "x" in data.dims and len(data['x'].values)>10: + x_ = data['x'].values + l2 = int(np.around(len(x_) / 2)) + x1 = x_[:l2] + x2 = x_[l2:] + split1 = data.sel(x=x1) + split2 = data.sel(x=x2) + add_arg1 = [step, split1] + add_arg2 = [step, split2] + values1 = xr.apply_ufunc(apply_f, *add_arg1, vectorize=True, input_core_dims=in_dims,output_core_dims=[['params']], + dask="parallelized", output_dtypes=[np.float32], dask_gufunc_kwargs={'allow_rechunk': True, 'output_sizes': {'params': output_size}}) + values2 = xr.apply_ufunc(apply_f, *add_arg2, vectorize=True, input_core_dims=in_dims,output_core_dims=[['params']], + dask="parallelized", output_dtypes=[np.float32], dask_gufunc_kwargs={'allow_rechunk': True, 'output_sizes': {'params': output_size}}) + values = xr.concat([values1, values2], dim="x") + values['params'] = list(range(len(values['params']))) + values.attrs = data.attrs + return values values = xr.apply_ufunc( apply_f, *add_arg, vectorize=True, From 09675ca2c69ceaa9af5f28a0fa8b55b92b5dcb80 Mon Sep 17 00:00:00 2001 From: ValentinaHutter <85164505+ValentinaHutter@users.noreply.github.com> Date: Tue, 5 Oct 2021 12:06:08 +0200 Subject: [PATCH 19/19] old fit_curve version --- src/openeo_processes/cubes.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/src/openeo_processes/cubes.py b/src/openeo_processes/cubes.py index 66902984..14cdddf7 100644 --- a/src/openeo_processes/cubes.py +++ b/src/openeo_processes/cubes.py @@ -453,23 +453,6 @@ def exec_xar(data, parameters, function, dimension): in_dims = [[dimension], [dimension]] add_arg = [step, data] output_size = len(parameters) - if "x" in data.dims and len(data['x'].values)>10: - x_ = data['x'].values - l2 = int(np.around(len(x_) / 2)) - x1 = x_[:l2] - x2 = x_[l2:] - split1 = data.sel(x=x1) - split2 = data.sel(x=x2) - add_arg1 = [step, split1] - add_arg2 = [step, split2] - values1 = xr.apply_ufunc(apply_f, *add_arg1, vectorize=True, input_core_dims=in_dims,output_core_dims=[['params']], - dask="parallelized", output_dtypes=[np.float32], dask_gufunc_kwargs={'allow_rechunk': True, 'output_sizes': {'params': output_size}}) - values2 = xr.apply_ufunc(apply_f, *add_arg2, vectorize=True, input_core_dims=in_dims,output_core_dims=[['params']], - dask="parallelized", output_dtypes=[np.float32], dask_gufunc_kwargs={'allow_rechunk': True, 'output_sizes': {'params': output_size}}) - values = xr.concat([values1, values2], dim="x") - values['params'] = list(range(len(values['params']))) - values.attrs = data.attrs - return values values = xr.apply_ufunc( apply_f, *add_arg, vectorize=True,