Skip to content

Unable to store dask array from NetCDF to zarr using DirectoryStore #277

@jacobtomlinson

Description

@jacobtomlinson

I'm trying to load a NetCDF file using Iris and then save the dask array out using zarr.

The file is from the Met Office MOGREPS open dataset and you can download it here.

In the example below I am loading the file with Iris. Extracting the dask array and then trying to store it. However I get a traceback complaining one of the chunks is missing. When exploring with bash the chunk is not missing.

Trying the same experiment with a random dask array of the same shape and size is successful.

Minimal, reproducible code sample, a copy-pastable example if possible

import iris
import zarr

cube = iris.load_cube(
    "/tmp/20160101_00_mogreps-g_wet_bulb_potential_temperature.nc", 
    "wet_bulb_potential_temperature")

dask_array = cube.lazy_data()

store = zarr.DirectoryStore('/tmp/20160101_00_mogreps-g_wet_bulb_potential_temperature.zarr')

z = zarr.empty(
    shape=dask_array.shape, 
    chunks=True, 
    dtype=dask_array.dtype, 
    store=store, 
    compression=None)

dask_array.store(z, lock=False)
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-8-87c7142a4252> in <module>()
----> 1 dask_array.store(z, lock=False)

/opt/conda/lib/python3.6/site-packages/dask/array/core.py in store(self, target, **kwargs)
   1203     @wraps(store)
   1204     def store(self, target, **kwargs):
-> 1205         return store([self], [target], **kwargs)
   1206 
   1207     def to_hdf5(self, filename, datapath, **kwargs):

/opt/conda/lib/python3.6/site-packages/dask/array/core.py in store(sources, targets, lock, regions, compute, return_stored, **kwargs)
    951 
    952         if compute:
--> 953             result.compute()
    954             return None
    955         else:

/opt/conda/lib/python3.6/site-packages/dask/base.py in compute(self, **kwargs)
    141         dask.base.compute
    142         """
--> 143         (result,) = compute(self, traverse=False, **kwargs)
    144         return result
    145 

/opt/conda/lib/python3.6/site-packages/dask/base.py in compute(*args, **kwargs)
    390     postcomputes = [a.__dask_postcompute__() if is_dask_collection(a)
    391                     else (None, a) for a in args]
--> 392     results = get(dsk, keys, **kwargs)
    393     results_iter = iter(results)
    394     return tuple(a if f is None else f(next(results_iter), *a)

/opt/conda/lib/python3.6/site-packages/dask/threaded.py in get(dsk, result, cache, num_workers, **kwargs)
     73     results = get_async(pool.apply_async, len(pool._pool), dsk, result,
     74                         cache=cache, get_id=_thread_get_id,
---> 75                         pack_exception=pack_exception, **kwargs)
     76 
     77     # Cleanup pools associated to dead threads

/opt/conda/lib/python3.6/site-packages/dask/local.py in get_async(apply_async, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, **kwargs)
    519                         _execute_task(task, data)  # Re-execute locally
    520                     else:
--> 521                         raise_exception(exc, tb)
    522                 res, worker_id = loads(res_info)
    523                 state['cache'][key] = res

/opt/conda/lib/python3.6/site-packages/dask/compatibility.py in reraise(exc, tb)
     65         if exc.__traceback__ is not tb:
     66             raise exc.with_traceback(tb)
---> 67         raise exc
     68 
     69 else:

/opt/conda/lib/python3.6/site-packages/dask/local.py in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
    288     try:
    289         task, data = loads(task_info)
--> 290         result = _execute_task(task, data)
    291         id = get_id()
    292         result = dumps((result, id))

/opt/conda/lib/python3.6/site-packages/dask/local.py in _execute_task(arg, cache, dsk)
    269         func, args = arg[0], arg[1:]
    270         args2 = [_execute_task(a, cache) for a in args]
--> 271         return func(*args2)
    272     elif not ishashable(arg):
    273         return arg

/opt/conda/lib/python3.6/site-packages/dask/array/core.py in store_chunk(x, out, index, lock, return_stored)
   2671 
   2672 def store_chunk(x, out, index, lock, return_stored):
-> 2673     return load_store_chunk(x, out, index, lock, return_stored, False)
   2674 
   2675 

/opt/conda/lib/python3.6/site-packages/dask/array/core.py in load_store_chunk(x, out, index, lock, return_stored, load_stored)
   2660     try:
   2661         if x is not None:
-> 2662             out[index] = np.asanyarray(x)
   2663         if return_stored and load_stored:
   2664             result = out[index]

/opt/conda/lib/python3.6/site-packages/zarr/core.py in __setitem__(self, selection, value)
   1100 
   1101         fields, selection = pop_fields(selection)
-> 1102         self.set_basic_selection(selection, value, fields=fields)
   1103 
   1104     def set_basic_selection(self, selection, value, fields=None):

/opt/conda/lib/python3.6/site-packages/zarr/core.py in set_basic_selection(self, selection, value, fields)
   1195             return self._set_basic_selection_zd(selection, value, fields=fields)
   1196         else:
-> 1197             return self._set_basic_selection_nd(selection, value, fields=fields)
   1198 
   1199     def set_orthogonal_selection(self, selection, value, fields=None):

/opt/conda/lib/python3.6/site-packages/zarr/core.py in _set_basic_selection_nd(self, selection, value, fields)
   1486         indexer = BasicIndexer(selection, self)
   1487 
-> 1488         self._set_selection(indexer, value, fields=fields)
   1489 
   1490     def _set_selection(self, indexer, value, fields=None):

/opt/conda/lib/python3.6/site-packages/zarr/core.py in _set_selection(self, indexer, value, fields)
   1534 
   1535             # put data
-> 1536             self._chunk_setitem(chunk_coords, chunk_selection, chunk_value, fields=fields)
   1537 
   1538     def _chunk_getitem(self, chunk_coords, chunk_selection, out, out_selection,

/opt/conda/lib/python3.6/site-packages/zarr/core.py in _chunk_setitem(self, chunk_coords, chunk_selection, value, fields)
   1642         with lock:
   1643             self._chunk_setitem_nosync(chunk_coords, chunk_selection, value,
-> 1644                                        fields=fields)
   1645 
   1646     def _chunk_setitem_nosync(self, chunk_coords, chunk_selection, value, fields=None):

/opt/conda/lib/python3.6/site-packages/zarr/core.py in _chunk_setitem_nosync(self, chunk_coords, chunk_selection, value, fields)
   1719 
   1720         # store
-> 1721         self.chunk_store[ckey] = cdata
   1722 
   1723     def _chunk_key(self, chunk_coords):

/opt/conda/lib/python3.6/site-packages/zarr/storage.py in __setitem__(self, key, value)
    772             # move temporary file into place
    773             if os.path.exists(file_path):
--> 774                 os.remove(file_path)
    775             os.rename(temp_path, file_path)
    776 

FileNotFoundError: [Errno 2] No such file or directory: '/tmp/20160101_00_mogreps-g_wet_bulb_potential_temperature.zarr/4.1.0.1.0'

Version and installation information

Please provide the following:

  • Value of zarr.__version__ : '2.2.0'
  • Value of numcodecs.__version__: Not installed
  • Version of Python interpreter: 3.6.3
  • Operating system (Linux/Windows/Mac): linux
  • How Zarr was installed (e.g., "using pip into virtual environment", or "using conda"): conda

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions