From fe5e7ad2d4e3eb14b022ecf10baae6d3cc2de07e Mon Sep 17 00:00:00 2001 From: dsgreen274 Date: Tue, 14 Mar 2023 20:34:45 +0530 Subject: [PATCH 01/41] Add feature to convert dataarray to dask dataframe. This is for the issue #7409 --- doc/api.rst | 1 + doc/whats-new.rst | 1 + xarray/core/dataarray.py | 68 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+) diff --git a/doc/api.rst b/doc/api.rst index 0d56fc73997..4d26a8a6482 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -631,6 +631,7 @@ DataArray methods DataArray.from_iris DataArray.from_series DataArray.to_cdms2 + DataArray.to_dask_dataframe DataArray.to_dataframe DataArray.to_dataset DataArray.to_dict diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 2482fe077ff..0e670480062 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -27,6 +27,7 @@ New Features By `Michael Niklas `_. - Support dask arrays in ``first`` and ``last`` reductions. By `Deepak Cherian `_. +- Added new method :py:meth:`DataArray.to_dask_dataframe`,convert a dataarray into a dask dataframe (:issue:`7409`). Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 1f04f506397..68670a6a404 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -96,6 +96,11 @@ T_XarrayOther = TypeVar("T_XarrayOther", bound=Union["DataArray", Dataset]) + try: + from dask.dataframe import DataFrame as DaskDataFrame + except ImportError: + DaskDataFrame = None + def _infer_coords_and_dims( shape, coords, dims @@ -6670,6 +6675,69 @@ def resample( **indexer_kwargs, ) + def to_dask_dataframe( + self, dim_order: Sequence[Hashable] = None, *, name: Hashable = None + ) -> DaskDataFrame: + """Convert this array into a dask.dataframe.DataFrame. + + Parameters + ---------- + + dim_order: Sequence of Hashable or None , optional + Hierarchical dimension order for the resulting dataframe. + Array content is transposed to this order and then written out as flat + vectors in contiguous order , so the last dimension in this list + will be contiguous in the resulting DataFrame. This has a major influence on + which operations are efficient on the resulting dask dataframe. + + name: Hashable or None, optional + Name given to this array(required if unnamed). + It is a keyword-only argument. A keyword-only argument can only be passed to the + function using its name as a keyword argument , and not as a positional argument. + + Returns + ------- + dask.dataframe.DataFrame + + Examples + -------- + + da=xr.DataArray(np.random.rand(4,3,2), + dims=('time','lat','lon'), + coords={'time':np.arange(4), + 'lat':[-30,-20,-10], + 'lon':[120,130]}, + name='temperature', + attrs={'units':'Celsius', + 'description':'Random temperature data'}) + + da.to_dask_dataframe(['lat','lon','time'],name="temp_dataframe") + + Dask DataFrame Structure: + lat lon time temp_dataframe + npartitions=1 + 0 int64 int64 int64 float64 + 23 ... ... ... ... + Dask Name: concat-indexed,30 tasks + + """ + + if name is None: + name = self.name + + if name is None: + raise ValueError( + "Cannot convert an unnamed DataArray to a " + "dask dataframe : use the ``name`` parameter" + ) + + if self.ndim == 0: + raise ValueError("Cannot convert a scalar to a dataframe") + + tmp_dataset = Dataset({name: self}) + dask_dataframe = tmp_dataset.to_dask_dataframe(dim_order) + return dask_dataframe + # this needs to be at the end, or mypy will confuse with `str` # https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names str = utils.UncachedAccessor(StringAccessor["DataArray"]) From 4174d50bb762cae2caadbe49d3c1478db1276f7d Mon Sep 17 00:00:00 2001 From: dsgreen274 Date: Thu, 16 Mar 2023 16:22:04 +0530 Subject: [PATCH 02/41] Add test for new method dataarray.to_dask_dataframe() --- xarray/tests/test_dataarray.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index ed1abea5fbe..9637053a92b 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3205,6 +3205,39 @@ def test_to_dataframe_0length(self) -> None: assert len(actual) == 0 assert_array_equal(actual.index.names, list("ABC")) + def test_to_dask_dataframe(self) -> None: + arr_np = np.random.randn(3, 4) + arr = DataArray(arr_np, [("B", [1, 2, 3]), ("A", list("cdef"))], name="foo") + expected = arr.to_series() + actual = arr.to_dask_dataframe()["foo"] + + assert_array_equal(actual.name, expected.name) + assert_array_equal(actual.values, expected.values) + + actual = arr.to_dask_dataframe(dim_order=["A", "B"])["foo"] + assert_array_equal(arr_np.transpose().reshape(-1), actual.values) + + # regression test for coords with different dimensions + + arr.coords["C"] = ("B", [-1, -2, -3]) + expected = arr.to_series().to_frame() + expected["C"] = [-1] * 4 + [-2] * 4 + [-3] * 4 + expected = expected[["C", "foo"]] + actual = arr.to_dask_dataframe()[["C", "foo"]] + + assert_array_equal(expected.values, actual.values) + assert_array_equal(expected.columns.values, actual.columns.values) + + with pytest.raises(ValueError, match="does not match the set of dimensions"): + arr.to_dask_dataframe(dim_order=["B", "A", "C"]) + + with pytest.raises(ValueError, match=r"cannot convert a scalar"): + arr.sel(A="c", B=2).to_dataframe() + + arr.name = None + with pytest.raises(ValueError, match=r"unnamed"): + arr.to_dataframe() + def test_to_pandas_name_matches_coordinate(self) -> None: # coordinate with same name as array arr = DataArray([1, 2, 3], dims="x", name="x") From ba47c16755503b9497e1a059347b92483cc8e514 Mon Sep 17 00:00:00 2001 From: dsgreen274 Date: Sat, 18 Mar 2023 01:10:33 +0530 Subject: [PATCH 03/41] Changes after review --- xarray/core/dataarray.py | 84 +++++++++++++++++----------------- xarray/tests/test_dataarray.py | 3 +- 2 files changed, 45 insertions(+), 42 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 68670a6a404..11f1a4a01db 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -57,6 +57,10 @@ from numpy.typing import ArrayLike + try: + from dask.dataframe import DataFrame as DaskDataFrame + except ImportError: + DaskDataFrame = None try: from dask.delayed import Delayed except ImportError: @@ -96,11 +100,6 @@ T_XarrayOther = TypeVar("T_XarrayOther", bound=Union["DataArray", Dataset]) - try: - from dask.dataframe import DataFrame as DaskDataFrame - except ImportError: - DaskDataFrame = None - def _infer_coords_and_dims( shape, coords, dims @@ -6676,24 +6675,33 @@ def resample( ) def to_dask_dataframe( - self, dim_order: Sequence[Hashable] = None, *, name: Hashable = None + self, + dim_order: Sequence[Hashable] | None = None, + set_index: bool = False, + *, + name: Hashable | None = None, ) -> DaskDataFrame: """Convert this array into a dask.dataframe.DataFrame. Parameters ---------- + dim_order : Sequence of Hashable or None , optional + Hierarchical dimension order for the resulting dataframe. + Array content is transposed to this order and then written out as flat + vectors in contiguous order , so the last dimension in this list + will be contiguous in the resulting DataFrame. This has a major influence + on which operations are efficient on the resulting dask dataframe. - dim_order: Sequence of Hashable or None , optional - Hierarchical dimension order for the resulting dataframe. - Array content is transposed to this order and then written out as flat - vectors in contiguous order , so the last dimension in this list - will be contiguous in the resulting DataFrame. This has a major influence on - which operations are efficient on the resulting dask dataframe. + name : Hashable or None, optional + Name given to this array(required if unnamed). + It is a keyword-only argument. A keyword-only argument can only be passed + to the function using its name as a keyword argument , and not as a + positional argument. - name: Hashable or None, optional - Name given to this array(required if unnamed). - It is a keyword-only argument. A keyword-only argument can only be passed to the - function using its name as a keyword argument , and not as a positional argument. + set_index : bool, default: False + If set_index=True, the dask DataFrame is indexed by this dataset's + coordinate. Since dask DataFrames do not support multi-indexes, + set_index only works if the dataset only contains one dimension. Returns ------- @@ -6701,42 +6709,36 @@ def to_dask_dataframe( Examples -------- - - da=xr.DataArray(np.random.rand(4,3,2), - dims=('time','lat','lon'), - coords={'time':np.arange(4), - 'lat':[-30,-20,-10], - 'lon':[120,130]}, - name='temperature', - attrs={'units':'Celsius', - 'description':'Random temperature data'}) - - da.to_dask_dataframe(['lat','lon','time'],name="temp_dataframe") - + >>> da = xr.DataArray( + ... np.arange(4 * 3 * 2).reshape(4, 3, 2), + ... dims=("time", "lat", "lon"), + ... coords={ + ... "time": np.arange(4), + ... "lat": [-30, -20, -10], + ... "lon": [120, 130], + ... }, + ... name="temperature", + ... attrs={"units": "Celsius", "description": "Random temperature data"}, + ... ) + >>> da.to_dask_dataframe(["lat", "lon", "time"], name="temp_dataframe") Dask DataFrame Structure: - lat lon time temp_dataframe + lat lon time temp_dataframe npartitions=1 - 0 int64 int64 int64 float64 - 23 ... ... ... ... - Dask Name: concat-indexed,30 tasks + 0 int64 int64 int64 int64 + 23 ... ... ... ... + Dask Name: concat-indexed, 30 tasks """ - if name is None: name = self.name if name is None: raise ValueError( "Cannot convert an unnamed DataArray to a " - "dask dataframe : use the ``name`` parameter" + "dask dataframe : use the ``name`` parameter ." ) - - if self.ndim == 0: - raise ValueError("Cannot convert a scalar to a dataframe") - - tmp_dataset = Dataset({name: self}) - dask_dataframe = tmp_dataset.to_dask_dataframe(dim_order) - return dask_dataframe + ds = self._to_dataset_whole(name) + return ds.to_dask_dataframe(dim_order, set_index) # this needs to be at the end, or mypy will confuse with `str` # https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 9637053a92b..246a86242aa 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3205,8 +3205,9 @@ def test_to_dataframe_0length(self) -> None: assert len(actual) == 0 assert_array_equal(actual.index.names, list("ABC")) + @requires_dask def test_to_dask_dataframe(self) -> None: - arr_np = np.random.randn(3, 4) + arr_np = np.arange(3 * 4).reshape(3, 4) arr = DataArray(arr_np, [("B", [1, 2, 3]), ("A", list("cdef"))], name="foo") expected = arr.to_series() actual = arr.to_dask_dataframe()["foo"] From ed559d3b8d8e52a2c37402d559a7bed0081873dc Mon Sep 17 00:00:00 2001 From: dsgreen274 Date: Sun, 19 Mar 2023 00:35:06 +0530 Subject: [PATCH 04/41] Corrections in docstring and import --- xarray/core/dataarray.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 11f1a4a01db..0ae23bb609d 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -60,7 +60,7 @@ try: from dask.dataframe import DataFrame as DaskDataFrame except ImportError: - DaskDataFrame = None + DaskDataFrame = None # type: ignore try: from dask.delayed import Delayed except ImportError: @@ -6722,12 +6722,11 @@ def to_dask_dataframe( ... ) >>> da.to_dask_dataframe(["lat", "lon", "time"], name="temp_dataframe") Dask DataFrame Structure: - lat lon time temp_dataframe + lat lon time temp_dataframe npartitions=1 - 0 int64 int64 int64 int64 - 23 ... ... ... ... - Dask Name: concat-indexed, 30 tasks - + 0 int64 int64 int64 int64 + 23 ... ... ... ... + Dask Name: concat-indexed, 1 graph layer """ if name is None: name = self.name From 65f9fab59a84b7ddaa24d9374342834f7baa6b9e Mon Sep 17 00:00:00 2001 From: dsgreen274 Date: Sun, 19 Mar 2023 01:48:54 +0530 Subject: [PATCH 05/41] docstring correction --- xarray/core/dataarray.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 0ae23bb609d..0050cd86b96 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -6727,6 +6727,7 @@ def to_dask_dataframe( 0 int64 int64 int64 int64 23 ... ... ... ... Dask Name: concat-indexed, 1 graph layer + """ if name is None: name = self.name From bffe3cbcc2e35ee1af81fe6b099645bd5b4b315e Mon Sep 17 00:00:00 2001 From: dsgreen274 Date: Tue, 21 Mar 2023 22:03:29 +0530 Subject: [PATCH 06/41] Remove name parameter --- xarray/core/dataarray.py | 24 +++++------------------- xarray/tests/test_dataarray.py | 5 ----- 2 files changed, 5 insertions(+), 24 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 0050cd86b96..68d14a4dc3d 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -6678,8 +6678,6 @@ def to_dask_dataframe( self, dim_order: Sequence[Hashable] | None = None, set_index: bool = False, - *, - name: Hashable | None = None, ) -> DaskDataFrame: """Convert this array into a dask.dataframe.DataFrame. @@ -6692,12 +6690,6 @@ def to_dask_dataframe( will be contiguous in the resulting DataFrame. This has a major influence on which operations are efficient on the resulting dask dataframe. - name : Hashable or None, optional - Name given to this array(required if unnamed). - It is a keyword-only argument. A keyword-only argument can only be passed - to the function using its name as a keyword argument , and not as a - positional argument. - set_index : bool, default: False If set_index=True, the dask DataFrame is indexed by this dataset's coordinate. Since dask DataFrames do not support multi-indexes, @@ -6717,27 +6709,21 @@ def to_dask_dataframe( ... "lat": [-30, -20, -10], ... "lon": [120, 130], ... }, - ... name="temperature", + ... name="eg_dataarray", ... attrs={"units": "Celsius", "description": "Random temperature data"}, ... ) - >>> da.to_dask_dataframe(["lat", "lon", "time"], name="temp_dataframe") + >>> da.to_dask_dataframe(["lat", "lon", "time"]) Dask DataFrame Structure: - lat lon time temp_dataframe + lat lon time eg_dataarray npartitions=1 0 int64 int64 int64 int64 23 ... ... ... ... Dask Name: concat-indexed, 1 graph layer """ - if name is None: - name = self.name - if name is None: - raise ValueError( - "Cannot convert an unnamed DataArray to a " - "dask dataframe : use the ``name`` parameter ." - ) - ds = self._to_dataset_whole(name) + name = self.name if self.name is not None else _THIS_ARRAY + ds = self._to_dataset_whole(name, shallow_copy=False) return ds.to_dask_dataframe(dim_order, set_index) # this needs to be at the end, or mypy will confuse with `str` diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 24eac24e9c9..5b4f68b3e22 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3212,7 +3212,6 @@ def test_to_dask_dataframe(self) -> None: expected = arr.to_series() actual = arr.to_dask_dataframe()["foo"] - assert_array_equal(actual.name, expected.name) assert_array_equal(actual.values, expected.values) actual = arr.to_dask_dataframe(dim_order=["A", "B"])["foo"] @@ -3235,10 +3234,6 @@ def test_to_dask_dataframe(self) -> None: with pytest.raises(ValueError, match=r"cannot convert a scalar"): arr.sel(A="c", B=2).to_dataframe() - arr.name = None - with pytest.raises(ValueError, match=r"unnamed"): - arr.to_dataframe() - def test_to_pandas_name_matches_coordinate(self) -> None: # coordinate with same name as array arr = DataArray([1, 2, 3], dims="x", name="x") From a7e423c662a48865ac77171efe16649f5f7b7dab Mon Sep 17 00:00:00 2001 From: dsgreen274 Date: Tue, 14 Mar 2023 20:34:45 +0530 Subject: [PATCH 07/41] Add feature to convert dataarray to dask dataframe. This is for the issue #7409 --- doc/api.rst | 1 + doc/whats-new.rst | 1 + xarray/core/dataarray.py | 68 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+) diff --git a/doc/api.rst b/doc/api.rst index 0d56fc73997..4d26a8a6482 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -631,6 +631,7 @@ DataArray methods DataArray.from_iris DataArray.from_series DataArray.to_cdms2 + DataArray.to_dask_dataframe DataArray.to_dataframe DataArray.to_dataset DataArray.to_dict diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 6bfdf0b6f0a..ade5fd66dc1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -66,6 +66,7 @@ New Features By `Deepak Cherian `_. - Improved performance in ``open_dataset`` for datasets with large object arrays (:issue:`7484`, :pull:`7494`). By `Alex Goodman `_ and `Deepak Cherian `_. +- Added new method :py:meth:`DataArray.to_dask_dataframe`,convert a dataarray into a dask dataframe (:issue:`7409`). Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 1f04f506397..68670a6a404 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -96,6 +96,11 @@ T_XarrayOther = TypeVar("T_XarrayOther", bound=Union["DataArray", Dataset]) + try: + from dask.dataframe import DataFrame as DaskDataFrame + except ImportError: + DaskDataFrame = None + def _infer_coords_and_dims( shape, coords, dims @@ -6670,6 +6675,69 @@ def resample( **indexer_kwargs, ) + def to_dask_dataframe( + self, dim_order: Sequence[Hashable] = None, *, name: Hashable = None + ) -> DaskDataFrame: + """Convert this array into a dask.dataframe.DataFrame. + + Parameters + ---------- + + dim_order: Sequence of Hashable or None , optional + Hierarchical dimension order for the resulting dataframe. + Array content is transposed to this order and then written out as flat + vectors in contiguous order , so the last dimension in this list + will be contiguous in the resulting DataFrame. This has a major influence on + which operations are efficient on the resulting dask dataframe. + + name: Hashable or None, optional + Name given to this array(required if unnamed). + It is a keyword-only argument. A keyword-only argument can only be passed to the + function using its name as a keyword argument , and not as a positional argument. + + Returns + ------- + dask.dataframe.DataFrame + + Examples + -------- + + da=xr.DataArray(np.random.rand(4,3,2), + dims=('time','lat','lon'), + coords={'time':np.arange(4), + 'lat':[-30,-20,-10], + 'lon':[120,130]}, + name='temperature', + attrs={'units':'Celsius', + 'description':'Random temperature data'}) + + da.to_dask_dataframe(['lat','lon','time'],name="temp_dataframe") + + Dask DataFrame Structure: + lat lon time temp_dataframe + npartitions=1 + 0 int64 int64 int64 float64 + 23 ... ... ... ... + Dask Name: concat-indexed,30 tasks + + """ + + if name is None: + name = self.name + + if name is None: + raise ValueError( + "Cannot convert an unnamed DataArray to a " + "dask dataframe : use the ``name`` parameter" + ) + + if self.ndim == 0: + raise ValueError("Cannot convert a scalar to a dataframe") + + tmp_dataset = Dataset({name: self}) + dask_dataframe = tmp_dataset.to_dask_dataframe(dim_order) + return dask_dataframe + # this needs to be at the end, or mypy will confuse with `str` # https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names str = utils.UncachedAccessor(StringAccessor["DataArray"]) From 5c7a48f231899c3fa99862aa754bb82a902bcf9e Mon Sep 17 00:00:00 2001 From: dsgreen274 Date: Thu, 16 Mar 2023 16:22:04 +0530 Subject: [PATCH 08/41] Add test for new method dataarray.to_dask_dataframe() --- xarray/tests/test_dataarray.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 378d471ba6b..2a9c36af32a 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3205,6 +3205,39 @@ def test_to_dataframe_0length(self) -> None: assert len(actual) == 0 assert_array_equal(actual.index.names, list("ABC")) + def test_to_dask_dataframe(self) -> None: + arr_np = np.random.randn(3, 4) + arr = DataArray(arr_np, [("B", [1, 2, 3]), ("A", list("cdef"))], name="foo") + expected = arr.to_series() + actual = arr.to_dask_dataframe()["foo"] + + assert_array_equal(actual.name, expected.name) + assert_array_equal(actual.values, expected.values) + + actual = arr.to_dask_dataframe(dim_order=["A", "B"])["foo"] + assert_array_equal(arr_np.transpose().reshape(-1), actual.values) + + # regression test for coords with different dimensions + + arr.coords["C"] = ("B", [-1, -2, -3]) + expected = arr.to_series().to_frame() + expected["C"] = [-1] * 4 + [-2] * 4 + [-3] * 4 + expected = expected[["C", "foo"]] + actual = arr.to_dask_dataframe()[["C", "foo"]] + + assert_array_equal(expected.values, actual.values) + assert_array_equal(expected.columns.values, actual.columns.values) + + with pytest.raises(ValueError, match="does not match the set of dimensions"): + arr.to_dask_dataframe(dim_order=["B", "A", "C"]) + + with pytest.raises(ValueError, match=r"cannot convert a scalar"): + arr.sel(A="c", B=2).to_dataframe() + + arr.name = None + with pytest.raises(ValueError, match=r"unnamed"): + arr.to_dataframe() + def test_to_pandas_name_matches_coordinate(self) -> None: # coordinate with same name as array arr = DataArray([1, 2, 3], dims="x", name="x") From 43a8b9b9d2b340b6e7384a83ed77a24f7b7990c7 Mon Sep 17 00:00:00 2001 From: dsgreen274 Date: Sat, 18 Mar 2023 01:10:33 +0530 Subject: [PATCH 09/41] Changes after review --- xarray/core/dataarray.py | 84 +++++++++++++++++----------------- xarray/tests/test_dataarray.py | 3 +- 2 files changed, 45 insertions(+), 42 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 68670a6a404..11f1a4a01db 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -57,6 +57,10 @@ from numpy.typing import ArrayLike + try: + from dask.dataframe import DataFrame as DaskDataFrame + except ImportError: + DaskDataFrame = None try: from dask.delayed import Delayed except ImportError: @@ -96,11 +100,6 @@ T_XarrayOther = TypeVar("T_XarrayOther", bound=Union["DataArray", Dataset]) - try: - from dask.dataframe import DataFrame as DaskDataFrame - except ImportError: - DaskDataFrame = None - def _infer_coords_and_dims( shape, coords, dims @@ -6676,24 +6675,33 @@ def resample( ) def to_dask_dataframe( - self, dim_order: Sequence[Hashable] = None, *, name: Hashable = None + self, + dim_order: Sequence[Hashable] | None = None, + set_index: bool = False, + *, + name: Hashable | None = None, ) -> DaskDataFrame: """Convert this array into a dask.dataframe.DataFrame. Parameters ---------- + dim_order : Sequence of Hashable or None , optional + Hierarchical dimension order for the resulting dataframe. + Array content is transposed to this order and then written out as flat + vectors in contiguous order , so the last dimension in this list + will be contiguous in the resulting DataFrame. This has a major influence + on which operations are efficient on the resulting dask dataframe. - dim_order: Sequence of Hashable or None , optional - Hierarchical dimension order for the resulting dataframe. - Array content is transposed to this order and then written out as flat - vectors in contiguous order , so the last dimension in this list - will be contiguous in the resulting DataFrame. This has a major influence on - which operations are efficient on the resulting dask dataframe. + name : Hashable or None, optional + Name given to this array(required if unnamed). + It is a keyword-only argument. A keyword-only argument can only be passed + to the function using its name as a keyword argument , and not as a + positional argument. - name: Hashable or None, optional - Name given to this array(required if unnamed). - It is a keyword-only argument. A keyword-only argument can only be passed to the - function using its name as a keyword argument , and not as a positional argument. + set_index : bool, default: False + If set_index=True, the dask DataFrame is indexed by this dataset's + coordinate. Since dask DataFrames do not support multi-indexes, + set_index only works if the dataset only contains one dimension. Returns ------- @@ -6701,42 +6709,36 @@ def to_dask_dataframe( Examples -------- - - da=xr.DataArray(np.random.rand(4,3,2), - dims=('time','lat','lon'), - coords={'time':np.arange(4), - 'lat':[-30,-20,-10], - 'lon':[120,130]}, - name='temperature', - attrs={'units':'Celsius', - 'description':'Random temperature data'}) - - da.to_dask_dataframe(['lat','lon','time'],name="temp_dataframe") - + >>> da = xr.DataArray( + ... np.arange(4 * 3 * 2).reshape(4, 3, 2), + ... dims=("time", "lat", "lon"), + ... coords={ + ... "time": np.arange(4), + ... "lat": [-30, -20, -10], + ... "lon": [120, 130], + ... }, + ... name="temperature", + ... attrs={"units": "Celsius", "description": "Random temperature data"}, + ... ) + >>> da.to_dask_dataframe(["lat", "lon", "time"], name="temp_dataframe") Dask DataFrame Structure: - lat lon time temp_dataframe + lat lon time temp_dataframe npartitions=1 - 0 int64 int64 int64 float64 - 23 ... ... ... ... - Dask Name: concat-indexed,30 tasks + 0 int64 int64 int64 int64 + 23 ... ... ... ... + Dask Name: concat-indexed, 30 tasks """ - if name is None: name = self.name if name is None: raise ValueError( "Cannot convert an unnamed DataArray to a " - "dask dataframe : use the ``name`` parameter" + "dask dataframe : use the ``name`` parameter ." ) - - if self.ndim == 0: - raise ValueError("Cannot convert a scalar to a dataframe") - - tmp_dataset = Dataset({name: self}) - dask_dataframe = tmp_dataset.to_dask_dataframe(dim_order) - return dask_dataframe + ds = self._to_dataset_whole(name) + return ds.to_dask_dataframe(dim_order, set_index) # this needs to be at the end, or mypy will confuse with `str` # https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 2a9c36af32a..24eac24e9c9 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3205,8 +3205,9 @@ def test_to_dataframe_0length(self) -> None: assert len(actual) == 0 assert_array_equal(actual.index.names, list("ABC")) + @requires_dask def test_to_dask_dataframe(self) -> None: - arr_np = np.random.randn(3, 4) + arr_np = np.arange(3 * 4).reshape(3, 4) arr = DataArray(arr_np, [("B", [1, 2, 3]), ("A", list("cdef"))], name="foo") expected = arr.to_series() actual = arr.to_dask_dataframe()["foo"] From aff8ecadbbfc4f74a2c80bd162537357da2a6de1 Mon Sep 17 00:00:00 2001 From: dsgreen274 Date: Sun, 19 Mar 2023 00:35:06 +0530 Subject: [PATCH 10/41] Corrections in docstring and import --- xarray/core/dataarray.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 11f1a4a01db..0ae23bb609d 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -60,7 +60,7 @@ try: from dask.dataframe import DataFrame as DaskDataFrame except ImportError: - DaskDataFrame = None + DaskDataFrame = None # type: ignore try: from dask.delayed import Delayed except ImportError: @@ -6722,12 +6722,11 @@ def to_dask_dataframe( ... ) >>> da.to_dask_dataframe(["lat", "lon", "time"], name="temp_dataframe") Dask DataFrame Structure: - lat lon time temp_dataframe + lat lon time temp_dataframe npartitions=1 - 0 int64 int64 int64 int64 - 23 ... ... ... ... - Dask Name: concat-indexed, 30 tasks - + 0 int64 int64 int64 int64 + 23 ... ... ... ... + Dask Name: concat-indexed, 1 graph layer """ if name is None: name = self.name From 9e4f8167bee86a97d891ee3c4452568d7cca4b91 Mon Sep 17 00:00:00 2001 From: dsgreen274 Date: Sun, 19 Mar 2023 01:48:54 +0530 Subject: [PATCH 11/41] docstring correction --- xarray/core/dataarray.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 0ae23bb609d..0050cd86b96 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -6727,6 +6727,7 @@ def to_dask_dataframe( 0 int64 int64 int64 int64 23 ... ... ... ... Dask Name: concat-indexed, 1 graph layer + """ if name is None: name = self.name From bcd06c8e650ef9db8a4908c020f59214c5962ad1 Mon Sep 17 00:00:00 2001 From: dsgreen274 Date: Tue, 21 Mar 2023 22:03:29 +0530 Subject: [PATCH 12/41] Remove name parameter --- xarray/core/dataarray.py | 24 +++++------------------- xarray/tests/test_dataarray.py | 5 ----- 2 files changed, 5 insertions(+), 24 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 0050cd86b96..68d14a4dc3d 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -6678,8 +6678,6 @@ def to_dask_dataframe( self, dim_order: Sequence[Hashable] | None = None, set_index: bool = False, - *, - name: Hashable | None = None, ) -> DaskDataFrame: """Convert this array into a dask.dataframe.DataFrame. @@ -6692,12 +6690,6 @@ def to_dask_dataframe( will be contiguous in the resulting DataFrame. This has a major influence on which operations are efficient on the resulting dask dataframe. - name : Hashable or None, optional - Name given to this array(required if unnamed). - It is a keyword-only argument. A keyword-only argument can only be passed - to the function using its name as a keyword argument , and not as a - positional argument. - set_index : bool, default: False If set_index=True, the dask DataFrame is indexed by this dataset's coordinate. Since dask DataFrames do not support multi-indexes, @@ -6717,27 +6709,21 @@ def to_dask_dataframe( ... "lat": [-30, -20, -10], ... "lon": [120, 130], ... }, - ... name="temperature", + ... name="eg_dataarray", ... attrs={"units": "Celsius", "description": "Random temperature data"}, ... ) - >>> da.to_dask_dataframe(["lat", "lon", "time"], name="temp_dataframe") + >>> da.to_dask_dataframe(["lat", "lon", "time"]) Dask DataFrame Structure: - lat lon time temp_dataframe + lat lon time eg_dataarray npartitions=1 0 int64 int64 int64 int64 23 ... ... ... ... Dask Name: concat-indexed, 1 graph layer """ - if name is None: - name = self.name - if name is None: - raise ValueError( - "Cannot convert an unnamed DataArray to a " - "dask dataframe : use the ``name`` parameter ." - ) - ds = self._to_dataset_whole(name) + name = self.name if self.name is not None else _THIS_ARRAY + ds = self._to_dataset_whole(name, shallow_copy=False) return ds.to_dask_dataframe(dim_order, set_index) # this needs to be at the end, or mypy will confuse with `str` diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 24eac24e9c9..5b4f68b3e22 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3212,7 +3212,6 @@ def test_to_dask_dataframe(self) -> None: expected = arr.to_series() actual = arr.to_dask_dataframe()["foo"] - assert_array_equal(actual.name, expected.name) assert_array_equal(actual.values, expected.values) actual = arr.to_dask_dataframe(dim_order=["A", "B"])["foo"] @@ -3235,10 +3234,6 @@ def test_to_dask_dataframe(self) -> None: with pytest.raises(ValueError, match=r"cannot convert a scalar"): arr.sel(A="c", B=2).to_dataframe() - arr.name = None - with pytest.raises(ValueError, match=r"unnamed"): - arr.to_dataframe() - def test_to_pandas_name_matches_coordinate(self) -> None: # coordinate with same name as array arr = DataArray([1, 2, 3], dims="x", name="x") From 4cca0a86502907e63862b2b083b8d181a217bfe5 Mon Sep 17 00:00:00 2001 From: dsgreen274 Date: Thu, 23 Mar 2023 20:45:20 +0530 Subject: [PATCH 13/41] Corrected doc/whats-new.rst --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index ade5fd66dc1..b3e92fc57a0 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -19,6 +19,7 @@ What's New v2023.04.0 (unreleased) ----------------------- +- Added new method :py:meth:`DataArray.to_dask_dataframe`,convert a dataarray into a dask dataframe (:issue:`7409`). New Features ~~~~~~~~~~~~ @@ -66,7 +67,6 @@ New Features By `Deepak Cherian `_. - Improved performance in ``open_dataset`` for datasets with large object arrays (:issue:`7484`, :pull:`7494`). By `Alex Goodman `_ and `Deepak Cherian `_. -- Added new method :py:meth:`DataArray.to_dask_dataframe`,convert a dataarray into a dask dataframe (:issue:`7409`). Breaking changes ~~~~~~~~~~~~~~~~ From 2c9ffb38866c8fce419adf08cbbdc9bed0235aca Mon Sep 17 00:00:00 2001 From: dsgreen274 Date: Mon, 27 Mar 2023 09:47:10 +0530 Subject: [PATCH 14/41] Update whats-new.rst --- doc/whats-new.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 40301203ec8..b3e92fc57a0 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -67,7 +67,6 @@ New Features By `Deepak Cherian `_. - Improved performance in ``open_dataset`` for datasets with large object arrays (:issue:`7484`, :pull:`7494`). By `Alex Goodman `_ and `Deepak Cherian `_. -- Added new method :py:meth:`DataArray.to_dask_dataframe`,convert a dataarray into a dask dataframe (:issue:`7409`). Breaking changes ~~~~~~~~~~~~~~~~ From a8500a257162c03ba0753e1982ff4fd75bf5b2a3 Mon Sep 17 00:00:00 2001 From: dsgreen274 Date: Tue, 28 Mar 2023 17:33:58 +0530 Subject: [PATCH 15/41] Space corrections in docstring --- xarray/core/dataarray.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 68d14a4dc3d..677828d7bcc 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -6714,10 +6714,10 @@ def to_dask_dataframe( ... ) >>> da.to_dask_dataframe(["lat", "lon", "time"]) Dask DataFrame Structure: - lat lon time eg_dataarray + lat lon time eg_dataarray npartitions=1 - 0 int64 int64 int64 int64 - 23 ... ... ... ... + 0 int64 int64 int64 int64 + 23 ... ... ... ... Dask Name: concat-indexed, 1 graph layer """ From 75e20b46a46b7f660c179a67d423dd2f82d123b7 Mon Sep 17 00:00:00 2001 From: dsgreen274 Date: Tue, 28 Mar 2023 18:11:58 +0530 Subject: [PATCH 16/41] Whitespace correction in docstring --- xarray/core/dataarray.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 677828d7bcc..4662455769a 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -6686,10 +6686,9 @@ def to_dask_dataframe( dim_order : Sequence of Hashable or None , optional Hierarchical dimension order for the resulting dataframe. Array content is transposed to this order and then written out as flat - vectors in contiguous order , so the last dimension in this list + vectors in contiguous order, so the last dimension in this list will be contiguous in the resulting DataFrame. This has a major influence on which operations are efficient on the resulting dask dataframe. - set_index : bool, default: False If set_index=True, the dask DataFrame is indexed by this dataset's coordinate. Since dask DataFrames do not support multi-indexes, From 65d3a937145f352117a30225cd89523194f6c4b6 Mon Sep 17 00:00:00 2001 From: dsgreen274 Date: Tue, 28 Mar 2023 18:37:51 +0530 Subject: [PATCH 17/41] Add white space in docstring line --- xarray/core/dataarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 4662455769a..cb4245b2268 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -6714,7 +6714,7 @@ def to_dask_dataframe( >>> da.to_dask_dataframe(["lat", "lon", "time"]) Dask DataFrame Structure: lat lon time eg_dataarray - npartitions=1 + npartitions=1 0 int64 int64 int64 int64 23 ... ... ... ... Dask Name: concat-indexed, 1 graph layer From 69811fefea7057a2115f733ff8b5a6ec41e8a427 Mon Sep 17 00:00:00 2001 From: dsgreen274 Date: Tue, 28 Mar 2023 18:53:10 +0530 Subject: [PATCH 18/41] Whitespace correction --- xarray/core/dataarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index cb4245b2268..4662455769a 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -6714,7 +6714,7 @@ def to_dask_dataframe( >>> da.to_dask_dataframe(["lat", "lon", "time"]) Dask DataFrame Structure: lat lon time eg_dataarray - npartitions=1 + npartitions=1 0 int64 int64 int64 int64 23 ... ... ... ... Dask Name: concat-indexed, 1 graph layer From 4bae82c4ee61f9bf866e97c743379b75ff8e86e2 Mon Sep 17 00:00:00 2001 From: dsgreen274 Date: Tue, 28 Mar 2023 20:49:19 +0530 Subject: [PATCH 19/41] Update line npartitions=1 --- xarray/core/dataarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 4662455769a..3956cb909e9 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -6714,7 +6714,7 @@ def to_dask_dataframe( >>> da.to_dask_dataframe(["lat", "lon", "time"]) Dask DataFrame Structure: lat lon time eg_dataarray - npartitions=1 + npartitions=1 0 int64 int64 int64 int64 23 ... ... ... ... Dask Name: concat-indexed, 1 graph layer From 3d3b09219f8ab658a71106073627cef3ef2a8b24 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 28 Mar 2023 15:20:38 +0000 Subject: [PATCH 20/41] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/dataarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 3956cb909e9..4662455769a 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -6714,7 +6714,7 @@ def to_dask_dataframe( >>> da.to_dask_dataframe(["lat", "lon", "time"]) Dask DataFrame Structure: lat lon time eg_dataarray - npartitions=1 + npartitions=1 0 int64 int64 int64 int64 23 ... ... ... ... Dask Name: concat-indexed, 1 graph layer From ee30fe4d6c67c16568602fd234e99d7c5e07573b Mon Sep 17 00:00:00 2001 From: dsgreen274 Date: Tue, 28 Mar 2023 21:29:19 +0530 Subject: [PATCH 21/41] Revert "Update line npartitions=1" This reverts commit 4bae82c4ee61f9bf866e97c743379b75ff8e86e2. Reverting commit . --- xarray/core/dataarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 3956cb909e9..4662455769a 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -6714,7 +6714,7 @@ def to_dask_dataframe( >>> da.to_dask_dataframe(["lat", "lon", "time"]) Dask DataFrame Structure: lat lon time eg_dataarray - npartitions=1 + npartitions=1 0 int64 int64 int64 int64 23 ... ... ... ... Dask Name: concat-indexed, 1 graph layer From 6c2c5e3aa616c87f4405df3558fe1acd716bf6e1 Mon Sep 17 00:00:00 2001 From: dsgreen274 Date: Tue, 28 Mar 2023 22:02:00 +0530 Subject: [PATCH 22/41] Add whitespace in npartitions=1 --- xarray/core/dataarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 4662455769a..3956cb909e9 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -6714,7 +6714,7 @@ def to_dask_dataframe( >>> da.to_dask_dataframe(["lat", "lon", "time"]) Dask DataFrame Structure: lat lon time eg_dataarray - npartitions=1 + npartitions=1 0 int64 int64 int64 int64 23 ... ... ... ... Dask Name: concat-indexed, 1 graph layer From 71c065664c3fa3f07df43d0011d77ac08e865c23 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 28 Mar 2023 16:33:01 +0000 Subject: [PATCH 23/41] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/dataarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 3956cb909e9..4662455769a 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -6714,7 +6714,7 @@ def to_dask_dataframe( >>> da.to_dask_dataframe(["lat", "lon", "time"]) Dask DataFrame Structure: lat lon time eg_dataarray - npartitions=1 + npartitions=1 0 int64 int64 int64 int64 23 ... ... ... ... Dask Name: concat-indexed, 1 graph layer From d89f8e0eb7f6d0303e84488801a81bdc94b3f75c Mon Sep 17 00:00:00 2001 From: dsgreen274 Date: Tue, 28 Mar 2023 22:25:56 +0530 Subject: [PATCH 24/41] Change example in docstring --- xarray/core/dataarray.py | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 3956cb909e9..3ce6e09159b 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -6701,23 +6701,34 @@ def to_dask_dataframe( Examples -------- >>> da = xr.DataArray( - ... np.arange(4 * 3 * 2).reshape(4, 3, 2), + ... np.arange(4 * 2 * 2).reshape(4, 2, 2), ... dims=("time", "lat", "lon"), ... coords={ ... "time": np.arange(4), - ... "lat": [-30, -20, -10], + ... "lat": [-30, -20], ... "lon": [120, 130], ... }, ... name="eg_dataarray", ... attrs={"units": "Celsius", "description": "Random temperature data"}, ... ) - >>> da.to_dask_dataframe(["lat", "lon", "time"]) - Dask DataFrame Structure: - lat lon time eg_dataarray - npartitions=1 - 0 int64 int64 int64 int64 - 23 ... ... ... ... - Dask Name: concat-indexed, 1 graph layer + >>>da.to_dask_dataframe(["lat", "lon", "time"]).compute() + lat lon time eg_dataarray + 0 -30 120 0 0 + 1 -30 120 1 4 + 2 -30 120 2 8 + 3 -30 120 3 12 + 4 -30 130 0 1 + 5 -30 130 1 5 + 6 -30 130 2 9 + 7 -30 130 3 13 + 8 -20 120 0 2 + 9 -20 120 1 6 + 10 -20 120 2 10 + 11 -20 120 3 14 + 12 -20 130 0 3 + 13 -20 130 1 7 + 14 -20 130 2 11 + 15 -20 130 3 15 """ From b54528294dcd8d8ffeb55aee48443601010d8e62 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 28 Mar 2023 17:02:57 +0000 Subject: [PATCH 25/41] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/dataarray.py | 116 +++++++++++++++++++-------------------- 1 file changed, 58 insertions(+), 58 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 4cffeece3b4..e7c97a3819e 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -6681,64 +6681,64 @@ def to_dask_dataframe( ) -> DaskDataFrame: """Convert this array into a dask.dataframe.DataFrame. - Parameters - ---------- - dim_order : Sequence of Hashable or None , optional - Hierarchical dimension order for the resulting dataframe. - Array content is transposed to this order and then written out as flat - vectors in contiguous order, so the last dimension in this list - will be contiguous in the resulting DataFrame. This has a major influence - on which operations are efficient on the resulting dask dataframe. - set_index : bool, default: False - If set_index=True, the dask DataFrame is indexed by this dataset's - coordinate. Since dask DataFrames do not support multi-indexes, - set_index only works if the dataset only contains one dimension. - - Returns - ------- - dask.dataframe.DataFrame - - Examples - -------- - >>> da = xr.DataArray( - ... np.arange(4 * 2 * 2).reshape(4, 2, 2), - ... dims=("time", "lat", "lon"), - ... coords={ - ... "time": np.arange(4), - ... "lat": [-30, -20], - ... "lon": [120, 130], - ... }, - ... name="eg_dataarray", - ... attrs={"units": "Celsius", "description": "Random temperature data"}, - ... ) -<<<<<<< HEAD - >>>da.to_dask_dataframe(["lat", "lon", "time"]).compute() - lat lon time eg_dataarray - 0 -30 120 0 0 - 1 -30 120 1 4 - 2 -30 120 2 8 - 3 -30 120 3 12 - 4 -30 130 0 1 - 5 -30 130 1 5 - 6 -30 130 2 9 - 7 -30 130 3 13 - 8 -20 120 0 2 - 9 -20 120 1 6 - 10 -20 120 2 10 - 11 -20 120 3 14 - 12 -20 130 0 3 - 13 -20 130 1 7 - 14 -20 130 2 11 - 15 -20 130 3 15 -======= - >>> da.to_dask_dataframe(["lat", "lon", "time"]) - Dask DataFrame Structure: - lat lon time eg_dataarray - npartitions=1 - 0 int64 int64 int64 int64 - 23 ... ... ... ... - Dask Name: concat-indexed, 1 graph layer ->>>>>>> origin/method-dataarray-to-daskdataframe + Parameters + ---------- + dim_order : Sequence of Hashable or None , optional + Hierarchical dimension order for the resulting dataframe. + Array content is transposed to this order and then written out as flat + vectors in contiguous order, so the last dimension in this list + will be contiguous in the resulting DataFrame. This has a major influence + on which operations are efficient on the resulting dask dataframe. + set_index : bool, default: False + If set_index=True, the dask DataFrame is indexed by this dataset's + coordinate. Since dask DataFrames do not support multi-indexes, + set_index only works if the dataset only contains one dimension. + + Returns + ------- + dask.dataframe.DataFrame + + Examples + -------- + >>> da = xr.DataArray( + ... np.arange(4 * 2 * 2).reshape(4, 2, 2), + ... dims=("time", "lat", "lon"), + ... coords={ + ... "time": np.arange(4), + ... "lat": [-30, -20], + ... "lon": [120, 130], + ... }, + ... name="eg_dataarray", + ... attrs={"units": "Celsius", "description": "Random temperature data"}, + ... ) + <<<<<<< HEAD + >>>da.to_dask_dataframe(["lat", "lon", "time"]).compute() + lat lon time eg_dataarray + 0 -30 120 0 0 + 1 -30 120 1 4 + 2 -30 120 2 8 + 3 -30 120 3 12 + 4 -30 130 0 1 + 5 -30 130 1 5 + 6 -30 130 2 9 + 7 -30 130 3 13 + 8 -20 120 0 2 + 9 -20 120 1 6 + 10 -20 120 2 10 + 11 -20 120 3 14 + 12 -20 130 0 3 + 13 -20 130 1 7 + 14 -20 130 2 11 + 15 -20 130 3 15 + ======= + >>> da.to_dask_dataframe(["lat", "lon", "time"]) + Dask DataFrame Structure: + lat lon time eg_dataarray + npartitions=1 + 0 int64 int64 int64 int64 + 23 ... ... ... ... + Dask Name: concat-indexed, 1 graph layer + >>>>>>> origin/method-dataarray-to-daskdataframe """ From e733b0f9ae2799f7a32726412e25681df65c6b6e Mon Sep 17 00:00:00 2001 From: dsgreen274 Date: Tue, 28 Mar 2023 22:35:27 +0530 Subject: [PATCH 26/41] Change example in docstring --- xarray/core/dataarray.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 4cffeece3b4..3ce6e09159b 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -6711,7 +6711,6 @@ def to_dask_dataframe( ... name="eg_dataarray", ... attrs={"units": "Celsius", "description": "Random temperature data"}, ... ) -<<<<<<< HEAD >>>da.to_dask_dataframe(["lat", "lon", "time"]).compute() lat lon time eg_dataarray 0 -30 120 0 0 @@ -6730,15 +6729,6 @@ def to_dask_dataframe( 13 -20 130 1 7 14 -20 130 2 11 15 -20 130 3 15 -======= - >>> da.to_dask_dataframe(["lat", "lon", "time"]) - Dask DataFrame Structure: - lat lon time eg_dataarray - npartitions=1 - 0 int64 int64 int64 int64 - 23 ... ... ... ... - Dask Name: concat-indexed, 1 graph layer ->>>>>>> origin/method-dataarray-to-daskdataframe """ From dffea4b56ca55e1804d29dea8ade74d8a1b006f1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 28 Mar 2023 18:34:05 +0000 Subject: [PATCH 27/41] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/dataarray.py | 96 ++++++++++++++++++++-------------------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index b4437eda709..4524d674308 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -6681,54 +6681,54 @@ def to_dask_dataframe( ) -> DaskDataFrame: """Convert this array into a dask.dataframe.DataFrame. - Parameters - ---------- - dim_order : Sequence of Hashable or None , optional - Hierarchical dimension order for the resulting dataframe. - Array content is transposed to this order and then written out as flat - vectors in contiguous order, so the last dimension in this list - will be contiguous in the resulting DataFrame. This has a major influence - on which operations are efficient on the resulting dask dataframe. - set_index : bool, default: False - If set_index=True, the dask DataFrame is indexed by this dataset's - coordinate. Since dask DataFrames do not support multi-indexes, - set_index only works if the dataset only contains one dimension. - - Returns - ------- - dask.dataframe.DataFrame - - Examples - -------- - >>> da = xr.DataArray( - ... np.arange(4 * 2 * 2).reshape(4, 2, 2), - ... dims=("time", "lat", "lon"), - ... coords={ - ... "time": np.arange(4), - ... "lat": [-30, -20], - ... "lon": [120, 130], - ... }, - ... name="eg_dataarray", - ... attrs={"units": "Celsius", "description": "Random temperature data"}, - ... ) - >>>da.to_dask_dataframe(["lat", "lon", "time"]).compute() - lat lon time eg_dataarray - 0 -30 120 0 0 - 1 -30 120 1 4 - 2 -30 120 2 8 - 3 -30 120 3 12 - 4 -30 130 0 1 - 5 -30 130 1 5 - 6 -30 130 2 9 - 7 -30 130 3 13 - 8 -20 120 0 2 - 9 -20 120 1 6 - 10 -20 120 2 10 - 11 -20 120 3 14 - 12 -20 130 0 3 - 13 -20 130 1 7 - 14 -20 130 2 11 - 15 -20 130 3 15 + Parameters + ---------- + dim_order : Sequence of Hashable or None , optional + Hierarchical dimension order for the resulting dataframe. + Array content is transposed to this order and then written out as flat + vectors in contiguous order, so the last dimension in this list + will be contiguous in the resulting DataFrame. This has a major influence + on which operations are efficient on the resulting dask dataframe. + set_index : bool, default: False + If set_index=True, the dask DataFrame is indexed by this dataset's + coordinate. Since dask DataFrames do not support multi-indexes, + set_index only works if the dataset only contains one dimension. + + Returns + ------- + dask.dataframe.DataFrame + + Examples + -------- + >>> da = xr.DataArray( + ... np.arange(4 * 2 * 2).reshape(4, 2, 2), + ... dims=("time", "lat", "lon"), + ... coords={ + ... "time": np.arange(4), + ... "lat": [-30, -20], + ... "lon": [120, 130], + ... }, + ... name="eg_dataarray", + ... attrs={"units": "Celsius", "description": "Random temperature data"}, + ... ) + >>> da.to_dask_dataframe(["lat", "lon", "time"]).compute() + lat lon time eg_dataarray + 0 -30 120 0 0 + 1 -30 120 1 4 + 2 -30 120 2 8 + 3 -30 120 3 12 + 4 -30 130 0 1 + 5 -30 130 1 5 + 6 -30 130 2 9 + 7 -30 130 3 13 + 8 -20 120 0 2 + 9 -20 120 1 6 + 10 -20 120 2 10 + 11 -20 120 3 14 + 12 -20 130 0 3 + 13 -20 130 1 7 + 14 -20 130 2 11 + 15 -20 130 3 15 """ From 40a3458f9b25b9ff58841a07c086541aca8e619c Mon Sep 17 00:00:00 2001 From: Deeksha <106010465+dsgreen2@users.noreply.github.com> Date: Tue, 4 Apr 2023 19:44:40 +0530 Subject: [PATCH 28/41] Update xarray/core/dataarray.py Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> --- xarray/core/dataarray.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index bc95caef2c3..8b24b5bcb30 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -6735,7 +6735,6 @@ def to_dask_dataframe( 13 -20 130 1 7 14 -20 130 2 11 15 -20 130 3 15 - """ name = self.name if self.name is not None else _THIS_ARRAY From 02f09d305c728463374c548090c1fcded2ed7404 Mon Sep 17 00:00:00 2001 From: Deeksha <106010465+dsgreen2@users.noreply.github.com> Date: Tue, 4 Apr 2023 19:44:52 +0530 Subject: [PATCH 29/41] Update doc/whats-new.rst Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8def0384362..d649457c0d3 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -19,7 +19,7 @@ What's New v2023.04.0 (unreleased) ----------------------- -- Added new method :py:meth:`DataArray.to_dask_dataframe`,convert a dataarray into a dask dataframe (:issue:`7409`). +- Added new method :py:meth:`DataArray.to_dask_dataframe`, convert a dataarray into a dask dataframe (:issue:`7409`). New Features ~~~~~~~~~~~~ From c732783e5dde52fb3ce3a3744700cbf05a4fe184 Mon Sep 17 00:00:00 2001 From: dsgreen274 Date: Sun, 16 Apr 2023 02:09:37 +0530 Subject: [PATCH 30/41] Add name check --- xarray/core/dataarray.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index b4437eda709..a20637e9311 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -6731,8 +6731,12 @@ def to_dask_dataframe( 15 -20 130 3 15 """ - - name = self.name if self.name is not None else _THIS_ARRAY + if self.name is None: + raise ValueError( + "Cannot convert an unnamed DataArray to a " + "dask dataframe : use the ``name`` parameter ." + ) + name=self.name ds = self._to_dataset_whole(name, shallow_copy=False) return ds.to_dask_dataframe(dim_order, set_index) From ab619f91ffd18c98f99bdf14a4e8d168df3a180a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 16 Apr 2023 20:00:23 +0000 Subject: [PATCH 31/41] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/dataarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index b0296797216..6072f180649 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -6946,7 +6946,7 @@ def to_dask_dataframe( "Cannot convert an unnamed DataArray to a " "dask dataframe : use the ``name`` parameter ." ) - name=self.name + name = self.name ds = self._to_dataset_whole(name, shallow_copy=False) return ds.to_dask_dataframe(dim_order, set_index) From 694ba7799de18e3fc9b25e1fb421f9bbe711266b Mon Sep 17 00:00:00 2001 From: dsgreen274 Date: Wed, 19 Apr 2023 11:01:49 +0530 Subject: [PATCH 32/41] Add test for unnamed dataarray. --- xarray/core/dataarray.py | 2 +- xarray/tests/test_dataarray.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index b0296797216..d7de3135beb 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -6944,7 +6944,7 @@ def to_dask_dataframe( if self.name is None: raise ValueError( "Cannot convert an unnamed DataArray to a " - "dask dataframe : use the ``name`` parameter ." + "dask dataframe : use the ``.rename`` method to assign a name." ) name=self.name ds = self._to_dataset_whole(name, shallow_copy=False) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 41e47d36ed8..eb11899ee42 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3272,7 +3272,11 @@ def test_to_dask_dataframe(self) -> None: arr.to_dask_dataframe(dim_order=["B", "A", "C"]) with pytest.raises(ValueError, match=r"cannot convert a scalar"): - arr.sel(A="c", B=2).to_dataframe() + arr.sel(A="c", B=2).to_dask_dataframe() + + arr.name = None + with pytest.raises(ValueError,match="cannot convert an unnamed dataarray."): + arr.to_dask_dataframe() def test_to_pandas_name_matches_coordinate(self) -> None: # coordinate with same name as array From 4866d8b853daf25da66c6c31c6cd2d900eba070f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 19 Apr 2023 05:37:09 +0000 Subject: [PATCH 33/41] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/tests/test_dataarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index eb11899ee42..7984ea82aee 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3275,7 +3275,7 @@ def test_to_dask_dataframe(self) -> None: arr.sel(A="c", B=2).to_dask_dataframe() arr.name = None - with pytest.raises(ValueError,match="cannot convert an unnamed dataarray."): + with pytest.raises(ValueError, match="cannot convert an unnamed dataarray."): arr.to_dask_dataframe() def test_to_pandas_name_matches_coordinate(self) -> None: From c56398985e88182e80cd75a65c6ff9ea3a809ba4 Mon Sep 17 00:00:00 2001 From: dsgreen274 Date: Thu, 20 Apr 2023 15:09:30 +0530 Subject: [PATCH 34/41] Remove scalar array test --- xarray/tests/test_dataarray.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index eb11899ee42..18bff625041 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3270,10 +3270,7 @@ def test_to_dask_dataframe(self) -> None: with pytest.raises(ValueError, match="does not match the set of dimensions"): arr.to_dask_dataframe(dim_order=["B", "A", "C"]) - - with pytest.raises(ValueError, match=r"cannot convert a scalar"): - arr.sel(A="c", B=2).to_dask_dataframe() - + arr.name = None with pytest.raises(ValueError,match="cannot convert an unnamed dataarray."): arr.to_dask_dataframe() From 87e993d62059bf29cf55efef4787ca2c00aad082 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 20 Apr 2023 09:42:21 +0000 Subject: [PATCH 35/41] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/tests/test_dataarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 2bf152413e1..430e76d7613 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3270,7 +3270,7 @@ def test_to_dask_dataframe(self) -> None: with pytest.raises(ValueError, match="does not match the set of dimensions"): arr.to_dask_dataframe(dim_order=["B", "A", "C"]) - + arr.name = None with pytest.raises(ValueError, match="cannot convert an unnamed dataarray."): arr.to_dask_dataframe() From b60a226720fd48ba2fb2b793fc02d47246284cb5 Mon Sep 17 00:00:00 2001 From: dsgreen274 Date: Thu, 20 Apr 2023 15:56:05 +0530 Subject: [PATCH 36/41] Change error message --- xarray/tests/test_dataarray.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 2bf152413e1..5c2725eb69a 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3270,9 +3270,9 @@ def test_to_dask_dataframe(self) -> None: with pytest.raises(ValueError, match="does not match the set of dimensions"): arr.to_dask_dataframe(dim_order=["B", "A", "C"]) - + arr.name = None - with pytest.raises(ValueError, match="cannot convert an unnamed dataarray."): + with pytest.raises(ValueError, match="Cannot convert an unnamed DataArray to a dask dataframe : use the ``.rename`` method to assign a name."): arr.to_dask_dataframe() def test_to_pandas_name_matches_coordinate(self) -> None: From 2466903563cdf6a5d5eaf218149e48096abaf4a7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 20 Apr 2023 10:28:46 +0000 Subject: [PATCH 37/41] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/tests/test_dataarray.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 5c2725eb69a..c7f11b5bec2 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3272,7 +3272,10 @@ def test_to_dask_dataframe(self) -> None: arr.to_dask_dataframe(dim_order=["B", "A", "C"]) arr.name = None - with pytest.raises(ValueError, match="Cannot convert an unnamed DataArray to a dask dataframe : use the ``.rename`` method to assign a name."): + with pytest.raises( + ValueError, + match="Cannot convert an unnamed DataArray to a dask dataframe : use the ``.rename`` method to assign a name.", + ): arr.to_dask_dataframe() def test_to_pandas_name_matches_coordinate(self) -> None: From 8bfbf3f9403c44d544d27711cdbc81b5de4612f1 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 20 Apr 2023 11:58:12 -0600 Subject: [PATCH 38/41] Update xarray/tests/test_dataarray.py --- xarray/tests/test_dataarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index c7f11b5bec2..8c1c74634ec 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3274,7 +3274,7 @@ def test_to_dask_dataframe(self) -> None: arr.name = None with pytest.raises( ValueError, - match="Cannot convert an unnamed DataArray to a dask dataframe : use the ``.rename`` method to assign a name.", + match="Cannot convert an unnamed DataArray", ): arr.to_dask_dataframe() From 1f67c6f0dd829e1e605194c787836964377b3f06 Mon Sep 17 00:00:00 2001 From: dsgreen274 Date: Fri, 21 Apr 2023 00:03:34 +0530 Subject: [PATCH 39/41] Update whats-new.rst --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8f4bb089122..b9e7ae92841 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -19,10 +19,10 @@ What's New v2023.05.0 (unreleased) ----------------------- -- Added new method :py:meth:`DataArray.to_dask_dataframe`, convert a dataarray into a dask dataframe (:issue:`7409`). New Features ~~~~~~~~~~~~ +- Added new method :py:meth:`DataArray.to_dask_dataframe`, convert a dataarray into a dask dataframe (:issue:`7409`). Breaking changes From e80ece0268b31adafd935353a206197625b11001 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 28 Apr 2023 08:28:53 -0600 Subject: [PATCH 40/41] Update doc/whats-new.rst --- doc/whats-new.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index e0ed47df022..5b74e642219 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,7 +22,8 @@ v2023.05.0 (unreleased) New Features ~~~~~~~~~~~~ -- Added new method :py:meth:`DataArray.to_dask_dataframe`, convert a dataarray into a dask dataframe (:issue:`7409`). +- Added new method :py:meth:`DataArray.to_dask_dataframe`, convert a dataarray into a dask dataframe (:issue:`7409`). + By `Deeksha `_. Breaking changes From 7ed49adee10c0fbf8a1753a99aa654ea5c30a3de Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 28 Apr 2023 14:30:04 +0000 Subject: [PATCH 41/41] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index a10b72dfb38..c74134708e1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,7 +22,7 @@ v2023.05.0 (unreleased) New Features ~~~~~~~~~~~~ -- Added new method :py:meth:`DataArray.to_dask_dataframe`, convert a dataarray into a dask dataframe (:issue:`7409`). +- Added new method :py:meth:`DataArray.to_dask_dataframe`, convert a dataarray into a dask dataframe (:issue:`7409`). By `Deeksha `_. - Add support for lshift and rshift binary operators (`<<`, `>>`) on :py:class:`xr.DataArray` of type :py:class:`int` (:issue:`7727` , :pull:`7741`).