77
88from zarr .core import Array
99from zarr .creation import array as _create_array
10- from zarr .creation import normalize_store_arg , open_array
10+ from zarr .creation import open_array
1111from zarr .errors import CopyError , PathNotFoundError
1212from zarr .hierarchy import Group
1313from zarr .hierarchy import group as _create_group
1414from zarr .hierarchy import open_group
1515from zarr .meta import json_dumps , json_loads
16- from zarr .storage import contains_array , contains_group , BaseStore
16+ from zarr .storage import contains_array , contains_group , normalize_store_arg , BaseStore
1717from zarr .util import TreeViewer , buffer_size , normalize_storage_path
1818
1919from typing import Union
2020
2121StoreLike = Union [BaseStore , MutableMapping , str , None ]
2222
2323
24+ def _check_and_update_path (store : BaseStore , path ):
25+ if getattr (store , '_store_version' , 2 ) > 2 and not path :
26+ raise ValueError ("path must be provided for v3 stores" )
27+ return normalize_storage_path (path )
28+
29+
2430# noinspection PyShadowingBuiltins
25- def open (store : StoreLike = None , mode : str = "a" , ** kwargs ):
31+ def open (store : StoreLike = None , mode : str = "a" , * , zarr_version = 2 , path = None , * *kwargs ):
2632 """Convenience function to open a group or array using file-mode-like semantics.
2733
2834 Parameters
@@ -34,6 +40,10 @@ def open(store: StoreLike = None, mode: str = "a", **kwargs):
3440 read/write (must exist); 'a' means read/write (create if doesn't
3541 exist); 'w' means create (overwrite if exists); 'w-' means create
3642 (fail if exists).
43+ zarr_version : {2, 3}
44+ The zarr protocol version to use.
45+ path : str
46+ The path within the store to open.
3747 **kwargs
3848 Additional parameters are passed through to :func:`zarr.creation.open_array` or
3949 :func:`zarr.hierarchy.open_group`.
@@ -75,15 +85,16 @@ def open(store: StoreLike = None, mode: str = "a", **kwargs):
7585
7686 """
7787
78- path = kwargs .get ('path' )
7988 # handle polymorphic store arg
8089 clobber = mode == 'w'
8190 # we pass storage options explicitly, since normalize_store_arg might construct
8291 # a store if the input is a fsspec-compatible URL
8392 _store : BaseStore = normalize_store_arg (
84- store , clobber = clobber , storage_options = kwargs .pop ("storage_options" , {})
93+ store , clobber = clobber , storage_options = kwargs .pop ("storage_options" , {}),
94+ zarr_version = zarr_version ,
8595 )
86- path = normalize_storage_path (path )
96+ path = _check_and_update_path (_store , path )
97+ kwargs ['path' ] = path
8798
8899 if mode in {'w' , 'w-' , 'x' }:
89100 if 'shape' in kwargs :
@@ -110,7 +121,7 @@ def _might_close(path):
110121 return isinstance (path , (str , os .PathLike ))
111122
112123
113- def save_array (store : StoreLike , arr , ** kwargs ):
124+ def save_array (store : StoreLike , arr , * , zarr_version = 2 , path = None , * *kwargs ):
114125 """Convenience function to save a NumPy array to the local file system, following a
115126 similar API to the NumPy save() function.
116127
@@ -120,6 +131,10 @@ def save_array(store: StoreLike, arr, **kwargs):
120131 Store or path to directory in file system or name of zip file.
121132 arr : ndarray
122133 NumPy array with data to save.
134+ zarr_version : {2, 3}
135+ The zarr protocol version to use when saving.
136+ path : str
137+ The path within the store where the array will be saved.
123138 kwargs
124139 Passed through to :func:`create`, e.g., compressor.
125140
@@ -142,16 +157,18 @@ def save_array(store: StoreLike, arr, **kwargs):
142157
143158 """
144159 may_need_closing = _might_close (store )
145- _store : BaseStore = normalize_store_arg (store , clobber = True )
160+ _store : BaseStore = normalize_store_arg (store , clobber = True , zarr_version = zarr_version )
161+ path = _check_and_update_path (_store , path )
146162 try :
147- _create_array (arr , store = _store , overwrite = True , ** kwargs )
163+ _create_array (arr , store = _store , overwrite = True , zarr_version = zarr_version , path = path ,
164+ ** kwargs )
148165 finally :
149166 if may_need_closing :
150167 # needed to ensure zip file records are written
151168 _store .close ()
152169
153170
154- def save_group (store : StoreLike , * args , ** kwargs ):
171+ def save_group (store : StoreLike , * args , zarr_version = 2 , path = None , ** kwargs ):
155172 """Convenience function to save several NumPy arrays to the local file system, following a
156173 similar API to the NumPy savez()/savez_compressed() functions.
157174
@@ -161,6 +178,10 @@ def save_group(store: StoreLike, *args, **kwargs):
161178 Store or path to directory in file system or name of zip file.
162179 args : ndarray
163180 NumPy arrays with data to save.
181+ zarr_version : {2, 3}
182+ The zarr protocol version to use when saving.
183+ path : str
184+ Path within the store where the group will be saved.
164185 kwargs
165186 NumPy arrays with data to save.
166187
@@ -213,21 +234,22 @@ def save_group(store: StoreLike, *args, **kwargs):
213234 raise ValueError ('at least one array must be provided' )
214235 # handle polymorphic store arg
215236 may_need_closing = _might_close (store )
216- _store : BaseStore = normalize_store_arg (store , clobber = True )
237+ _store : BaseStore = normalize_store_arg (store , clobber = True , zarr_version = zarr_version )
238+ path = _check_and_update_path (_store , path )
217239 try :
218- grp = _create_group (_store , overwrite = True )
240+ grp = _create_group (_store , path = path , overwrite = True , zarr_version = zarr_version )
219241 for i , arr in enumerate (args ):
220242 k = 'arr_{}' .format (i )
221- grp .create_dataset (k , data = arr , overwrite = True )
243+ grp .create_dataset (k , data = arr , overwrite = True , zarr_version = zarr_version )
222244 for k , arr in kwargs .items ():
223- grp .create_dataset (k , data = arr , overwrite = True )
245+ grp .create_dataset (k , data = arr , overwrite = True , zarr_version = zarr_version )
224246 finally :
225247 if may_need_closing :
226248 # needed to ensure zip file records are written
227249 _store .close ()
228250
229251
230- def save (store : StoreLike , * args , ** kwargs ):
252+ def save (store : StoreLike , * args , zarr_version = 2 , path = None , ** kwargs ):
231253 """Convenience function to save an array or group of arrays to the local file system.
232254
233255 Parameters
@@ -236,6 +258,10 @@ def save(store: StoreLike, *args, **kwargs):
236258 Store or path to directory in file system or name of zip file.
237259 args : ndarray
238260 NumPy arrays with data to save.
261+ zarr_version : {2, 3}
262+ The zarr protocol version to use when saving.
263+ path : str
264+ The path within the group where the arrays will be saved.
239265 kwargs
240266 NumPy arrays with data to save.
241267
@@ -302,9 +328,10 @@ def save(store: StoreLike, *args, **kwargs):
302328 if len (args ) == 0 and len (kwargs ) == 0 :
303329 raise ValueError ('at least one array must be provided' )
304330 if len (args ) == 1 and len (kwargs ) == 0 :
305- save_array (store , args [0 ])
331+ save_array (store , args [0 ], zarr_version = zarr_version , path = path )
306332 else :
307- save_group (store , * args , ** kwargs )
333+ save_group (store , * args , zarr_version = zarr_version , path = path ,
334+ ** kwargs )
308335
309336
310337class LazyLoader (Mapping ):
@@ -337,7 +364,7 @@ def __repr__(self):
337364 return r
338365
339366
340- def load (store : StoreLike ):
367+ def load (store : StoreLike , zarr_version = 2 , path = None ):
341368 """Load data from an array or group into memory.
342369
343370 Parameters
@@ -363,11 +390,12 @@ def load(store: StoreLike):
363390
364391 """
365392 # handle polymorphic store arg
366- _store = normalize_store_arg (store )
367- if contains_array (_store , path = None ):
368- return Array (store = _store , path = None )[...]
369- elif contains_group (_store , path = None ):
370- grp = Group (store = _store , path = None )
393+ _store = normalize_store_arg (store , zarr_version = zarr_version )
394+ path = _check_and_update_path (_store , path )
395+ if contains_array (_store , path = path ):
396+ return Array (store = _store , path = path )[...]
397+ elif contains_group (_store , path = path ):
398+ grp = Group (store = _store , path = path )
371399 return LazyLoader (grp )
372400
373401
@@ -601,59 +629,79 @@ def copy_store(source, dest, source_path='', dest_path='', excludes=None,
601629 # setup counting variables
602630 n_copied = n_skipped = n_bytes_copied = 0
603631
632+ source_store_version = getattr (source , '_store_version' , 2 )
633+ dest_store_version = getattr (dest , '_store_version' , 2 )
634+ if source_store_version != dest_store_version :
635+ raise ValueError ("zarr stores must share the same protocol version" )
636+ if source_store_version > 2 :
637+ if not source_path or not dest_path :
638+ raise ValueError ("v3 stores require specifying a non-empty "
639+ "source_path and dest_path" )
640+
604641 # setup logging
605642 with _LogWriter (log ) as log :
606643
607644 # iterate over source keys
608645 for source_key in sorted (source .keys ()):
609646
610647 # filter to keys under source path
611- if source_key .startswith (source_path ):
648+ if source_store_version == 2 :
649+ if not source_key .startswith (source_path ):
650+ continue
651+ elif source_store_version == 3 :
652+ # 'meta/root/' or 'data/root/' have length 10
653+ if not source_key [10 :].startswith (source_path ):
654+ continue
612655
613- # process excludes and includes
614- exclude = False
615- for prog in excludes :
656+ # process excludes and includes
657+ exclude = False
658+ for prog in excludes :
659+ if prog .search (source_key ):
660+ exclude = True
661+ break
662+ if exclude :
663+ for prog in includes :
616664 if prog .search (source_key ):
617- exclude = True
665+ exclude = False
618666 break
619- if exclude :
620- for prog in includes :
621- if prog .search (source_key ):
622- exclude = False
623- break
624- if exclude :
625- continue
667+ if exclude :
668+ continue
626669
627- # map key to destination path
670+ # map key to destination path
671+ if source_store_version == 2 :
628672 key_suffix = source_key [len (source_path ):]
629673 dest_key = dest_path + key_suffix
630-
631- # create a descriptive label for this operation
632- descr = source_key
633- if dest_key != source_key :
634- descr = descr + ' -> ' + dest_key
635-
636- # decide what to do
637- do_copy = True
638- if if_exists != 'replace' :
639- if dest_key in dest :
640- if if_exists == 'raise' :
641- raise CopyError ('key {!r} exists in destination'
642- .format (dest_key ))
643- elif if_exists == 'skip' :
644- do_copy = False
645-
646- # take action
647- if do_copy :
648- log ('copy {}' .format (descr ))
649- if not dry_run :
650- data = source [source_key ]
651- n_bytes_copied += buffer_size (data )
652- dest [dest_key ] = data
653- n_copied += 1
654- else :
655- log ('skip {}' .format (descr ))
656- n_skipped += 1
674+ elif source_store_version == 3 :
675+ # 10 is length of 'meta/root/' or 'data/root/'
676+ key_suffix = source_key [10 + len (source_path ):]
677+ dest_key = source_key [:10 ] + dest_path + key_suffix
678+
679+ # create a descriptive label for this operation
680+ descr = source_key
681+ if dest_key != source_key :
682+ descr = descr + ' -> ' + dest_key
683+
684+ # decide what to do
685+ do_copy = True
686+ if if_exists != 'replace' :
687+ if dest_key in dest :
688+ if if_exists == 'raise' :
689+ raise CopyError ('key {!r} exists in destination'
690+ .format (dest_key ))
691+ elif if_exists == 'skip' :
692+ do_copy = False
693+
694+ # take action
695+ if do_copy :
696+ log ('copy {}' .format (descr ))
697+ if not dry_run :
698+ data = source [source_key ]
699+ n_bytes_copied += buffer_size (data )
700+ dest [dest_key ] = data
701+ n_copied += 1
702+ else :
703+ log ('skip {}' .format (descr ))
704+ n_skipped += 1
657705
658706 # log a final message with a summary of what happened
659707 _log_copy_summary (log , dry_run , n_copied , n_skipped , n_bytes_copied )
@@ -908,7 +956,15 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists,
908956
909957 # copy attributes
910958 if not without_attrs :
911- ds .attrs .update (source .attrs )
959+ if dest_h5py and 'filters' in source .attrs :
960+ # No filters key in v3 metadata so it was stored in the
961+ # attributes instead. We cannot copy this key to
962+ # HDF5 attrs, though!
963+ source_attrs = source .attrs .asdict ().copy ()
964+ source_attrs .pop ('filters' , None )
965+ else :
966+ source_attrs = source .attrs
967+ ds .attrs .update (source_attrs )
912968
913969 n_copied += 1
914970
@@ -1064,6 +1120,8 @@ def copy_all(source, dest, shallow=False, without_attrs=False, log=None,
10641120 # setup counting variables
10651121 n_copied = n_skipped = n_bytes_copied = 0
10661122
1123+ zarr_version = getattr (source , '_version' , 2 )
1124+
10671125 # setup logging
10681126 with _LogWriter (log ) as log :
10691127
@@ -1075,15 +1133,16 @@ def copy_all(source, dest, shallow=False, without_attrs=False, log=None,
10751133 n_copied += c
10761134 n_skipped += s
10771135 n_bytes_copied += b
1078- dest .attrs .update (** source .attrs )
1136+ if zarr_version == 2 :
1137+ dest .attrs .update (** source .attrs )
10791138
10801139 # log a final message with a summary of what happened
10811140 _log_copy_summary (log , dry_run , n_copied , n_skipped , n_bytes_copied )
10821141
10831142 return n_copied , n_skipped , n_bytes_copied
10841143
10851144
1086- def consolidate_metadata (store : StoreLike , metadata_key = ".zmetadata" ):
1145+ def consolidate_metadata (store : BaseStore , metadata_key = ".zmetadata" ):
10871146 """
10881147 Consolidate all metadata for groups and arrays within the given store
10891148 into a single resource and put it under the given key.
0 commit comments