diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index dd5409a..8a71896 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -35,4 +35,4 @@ jobs: - name: Run Tests shell: bash -l {0} run: | - pytest --verbose --ignore=intake_xarray/tests/test_network.py + pytest --verbose -s --ignore=intake_xarray/tests/test_network.py diff --git a/ci/environment-upstream.yml b/ci/environment-upstream.yml index 63f0833..48727a3 100644 --- a/ci/environment-upstream.yml +++ b/ci/environment-upstream.yml @@ -2,7 +2,7 @@ name: test_env channels: - conda-forge dependencies: - - python + - python==3.9 - aiohttp - boto3 - flask @@ -12,12 +12,13 @@ dependencies: - pydap - pytest - rasterio - - s3fs + - tornado - scikit-image - zarr + - s3fs + - moto - pip: - - git+https://github.com/intake/filesystem_spec.git + - git+https://github.com/fsspec/filesystem_spec.git - git+https://github.com/intake/intake.git - git+https://github.com/pydata/xarray.git - rangehttpserver - - moto[s3]==1 diff --git a/intake_xarray/tests/test_remote.py b/intake_xarray/tests/test_remote.py index d910a63..6232884 100644 --- a/intake_xarray/tests/test_remote.py +++ b/intake_xarray/tests/test_remote.py @@ -23,6 +23,7 @@ def data_server(): pwd = os.getcwd() os.chdir(DIRECTORY) command = ['python', '-m', 'RangeHTTPServer'] + success = False try: P = subprocess.Popen(command) timeout = 10 @@ -34,11 +35,14 @@ def data_server(): time.sleep(0.1) timeout -= 0.1 assert timeout > 0 + success = False yield 'http://localhost:8000' finally: os.chdir(pwd) P.terminate() - P.communicate() + out = P.communicate() + if not success: + print(out) def test_http_server_files(data_server): diff --git a/intake_xarray/xarray_container.py b/intake_xarray/xarray_container.py index 9e24018..a3e6979 100644 --- a/intake_xarray/xarray_container.py +++ b/intake_xarray/xarray_container.py @@ -32,7 +32,7 @@ def serialize_zarr_ds(ds): try: attrs = ds.attrs.copy() ds.attrs.pop('_ARRAY_DIMENSIONS', None) # zarr implementation detail - ds.to_zarr(store=s, chunk_store={}, compute=False) + ds.to_zarr(store=s, chunk_store={}, compute=False, consolidated=False) finally: ds.attrs = attrs return s diff --git a/intake_xarray/xzarr.py b/intake_xarray/xzarr.py index 8fb1d41..58ace7e 100644 --- a/intake_xarray/xzarr.py +++ b/intake_xarray/xzarr.py @@ -4,6 +4,13 @@ class ZarrSource(DataSourceMixin): """Open a xarray dataset. + If the path is passed as a list or a string containing "*", then multifile open + will be called automatically. + + Note that the implicit default value of the ``chunks`` kwarg is ``{}``, i.e., dask + will be used to open the dataset with chunksize as inherent in the file. To bypass + dask (if you only want to use ``.read()``), use ``chunks=None``. + Parameters ---------- urlpath: str @@ -12,7 +19,7 @@ class ZarrSource(DataSourceMixin): storage_options: dict Parameters passed to the backend file-system kwargs: - Further parameters are passed to xr.open_zarr + Further parameters are passed to xarray """ name = 'zarr' @@ -25,13 +32,16 @@ def __init__(self, urlpath, storage_options=None, metadata=None, **kwargs): def _open_dataset(self): import xarray as xr - from fsspec import get_mapper - - self._mapper = get_mapper(self.urlpath, **self.storage_options) kw = self.kwargs.copy() if "consolidated" not in kw: kw['consolidated'] = False - self._ds = xr.open_zarr(self._mapper, **kw) + if "chunks" not in kw: + kw["chunks"] = {} + kw["engine"] = "zarr" + if isinstance(self.urlpath, list) or "*" in self.urlpath: + self._ds = xr.open_mfdataset(self.urlpath, **kw) + else: + self._ds = xr.open_dataset(self.urlpath, **kw) def close(self): super(ZarrSource, self).close()