@@ -241,7 +241,7 @@ def _get_filepath_or_buffer(
241241 ----------
242242 filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path),
243243 or buffer
244- compression : {{'gzip', 'bz2', 'zip', 'xz', None}}, optional
244+ compression : {{'gzip', 'bz2', 'zip', 'xz', 'zstd', None}}, optional
245245 encoding : the encoding to use to decode bytes, default is 'utf-8'
246246 mode : str, optional
247247
@@ -420,7 +420,7 @@ def file_path_to_url(path: str) -> str:
420420 return urljoin ("file:" , pathname2url (path ))
421421
422422
423- _compression_to_extension = {"gzip" : ".gz" , "bz2" : ".bz2" , "zip" : ".zip" , "xz" : ".xz" }
423+ _compression_to_extension = {"gzip" : ".gz" , "bz2" : ".bz2" , "zip" : ".zip" , "xz" : ".xz" , "zstd" : ".zst" }
424424
425425
426426def get_compression_method (
@@ -471,10 +471,10 @@ def infer_compression(
471471 ----------
472472 filepath_or_buffer : str or file handle
473473 File path or object.
474- compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}
474+ compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', 'zstd', None}
475475 If 'infer' and `filepath_or_buffer` is path-like, then detect
476476 compression from the following extensions: '.gz', '.bz2', '.zip',
477- or '.xz ' (otherwise no compression).
477+ '.xz', or '.zst ' (otherwise no compression).
478478
479479 Returns
480480 -------
@@ -556,11 +556,11 @@ def get_handle(
556556 compression : str or dict, default None
557557 If string, specifies compression mode. If dict, value at key 'method'
558558 specifies compression mode. Compression mode must be one of {'infer',
559- 'gzip', 'bz2', 'zip', 'xz', None}. If compression mode is 'infer'
560- and `filepath_or_buffer` is path-like, then detect compression from
561- the following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise
562- no compression). If dict and compression mode is one of
563- {'zip', 'gzip', 'bz2'}, or inferred as one of the above,
559+ 'gzip', 'bz2', 'zip', 'xz', 'zstd', None}. If compression mode is
560+ 'infer' and `filepath_or_buffer` is path-like, then detect compression
561+ from the following extensions: '.gz', '.bz2', '.zip', '.xz', or '.zst'
562+ (otherwise no compression). If dict and compression mode is one of
563+ {'zip', 'gzip', 'bz2', 'zstd' }, or inferred as one of the above,
564564 other entries passed as additional compression options.
565565
566566 .. versionchanged:: 1.0.0
@@ -572,7 +572,7 @@ def get_handle(
572572 .. versionchanged:: 1.1.0
573573
574574 Passing compression options as keys in dict is now
575- supported for compression modes 'gzip' and 'bz2' as well as 'zip'.
575+ supported for compression modes 'gzip', 'bz2', 'zstd' and 'zip'.
576576
577577 memory_map : bool, default False
578578 See parsers._parser_params for more information.
@@ -689,6 +689,23 @@ def get_handle(
689689 elif compression == "xz" :
690690 handle = get_lzma_file (lzma )(handle , ioargs .mode )
691691
692+ # Zstd Compression
693+ elif compression == "zstd" :
694+ zstd = import_optional_dependency ("zstandard" )
695+ open_args = {
696+ arg : compression_args .pop (arg , None )
697+ for arg in ["encoding" , "errors" , "newline" ]
698+ }
699+ if "r" in ioargs .mode :
700+ open_args ["cctx" ] = zstd .ZstdDecompressor (** compression_args )
701+ else :
702+ open_args ["dctx" ] = zstd .ZstdCompressor (** compression_args )
703+ handle = zstd .open (
704+ handle ,
705+ mode = ioargs .mode ,
706+ ** open_args ,
707+ )
708+
692709 # Unrecognized Compression
693710 else :
694711 msg = f"Unrecognized compression type: { compression } "
0 commit comments