4040import warnings
4141import zipfile
4242
43+ from pandas .util ._decorators import doc
4344from pandas ._typing import (
4445 BaseBuffer ,
4546 CompressionDict ,
5455from pandas .util ._exceptions import find_stack_level
5556
5657from pandas .core .dtypes .common import is_file_like
58+ from pandas .core import generic
5759
5860_VALID_URLS = set (uses_relative + uses_netloc + uses_params )
5961_VALID_URLS .discard ("" )
6062
6163BaseBufferT = TypeVar ("BaseBufferT" , bound = BaseBuffer )
6264
63- # For the _is_binary_mode, we need to get python-zstandard's reader class because
64- # it doesn't use any of the builtin base classes (such as RawIOBase).
65- # Unfortunately python-zstandard doesn't expose that particular class, so we have
66- # to get it through `zstd.open`.
67- try :
68- with import_optional_dependency ("zstandard" ).open (io .BytesIO ()) as reader :
69- _ZstdDecompressorReader : type | None = type (reader )
70- except ImportError :
71- _ZstdDecompressorReader = None
72-
7365
7466@dataclasses .dataclass
7567class IOArgs :
@@ -257,6 +249,7 @@ def is_fsspec_url(url: FilePath | BaseBuffer) -> bool:
257249 )
258250
259251
252+ @doc (compression_options = generic ._shared_docs ["compression_options" ] % "filepath_or_buffer" )
260253def _get_filepath_or_buffer (
261254 filepath_or_buffer : FilePath | BaseBuffer ,
262255 encoding : str = "utf-8" ,
@@ -272,7 +265,7 @@ def _get_filepath_or_buffer(
272265 ----------
273266 filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path),
274267 or buffer
275- compression : {{'gzip', 'bz2', 'zip', 'xz', 'zstd', None}}, optional
268+ {compression_options}
276269 encoding : the encoding to use to decode bytes, default is 'utf-8'
277270 mode : str, optional
278271
@@ -499,6 +492,7 @@ def get_compression_method(
499492 return compression_method , compression_args
500493
501494
495+ @doc (compression_options = generic ._shared_docs ["compression_options" ] % "filepath_or_buffer" )
502496def infer_compression (
503497 filepath_or_buffer : FilePath | BaseBuffer , compression : str | None
504498) -> str | None :
@@ -512,10 +506,7 @@ def infer_compression(
512506 ----------
513507 filepath_or_buffer : str or file handle
514508 File path or object.
515- compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', 'zstd', None}
516- If 'infer' and `filepath_or_buffer` is path-like, then detect
517- compression from the following extensions: '.gz', '.bz2', '.zip',
518- '.xz', or '.zst' (otherwise no compression).
509+ {compression_options}
519510
520511 Returns
521512 -------
@@ -603,6 +594,7 @@ def get_handle(
603594 ...
604595
605596
597+ @doc (compression_options = generic ._shared_docs ["compression_options" ] % "path_or_buf" )
606598def get_handle (
607599 path_or_buf : FilePath | BaseBuffer ,
608600 mode : str ,
@@ -625,15 +617,7 @@ def get_handle(
625617 Mode to open path_or_buf with.
626618 encoding : str or None
627619 Encoding to use.
628- compression : str or dict, default None
629- If string, specifies compression mode. If dict, value at key 'method'
630- specifies compression mode. Compression mode must be one of {'infer',
631- 'gzip', 'bz2', 'zip', 'xz', 'zstd', None}. If compression mode is
632- 'infer' and `filepath_or_buffer` is path-like, then detect compression
633- from the following extensions: '.gz', '.bz2', '.zip', '.xz', or '.zst'
634- (otherwise no compression). If dict and compression mode is one of
635- {'zip', 'gzip', 'bz2', 'zstd'}, or inferred as one of the above,
636- other entries passed as additional compression options.
620+ {compression_options}
637621
638622 .. versionchanged:: 1.0.0
639623
@@ -1117,11 +1101,23 @@ def _is_binary_mode(handle: FilePath | BaseBuffer, mode: str) -> bool:
11171101 if issubclass (type (handle ), text_classes ):
11181102 return False
11191103
1120- # classes that expect bytes
1121- binary_classes : list [type ] = [BufferedIOBase , RawIOBase ]
1122- # Zstandard doesn't use any of the builtin base classes
1123- if _ZstdDecompressorReader is not None :
1124- binary_classes .append (_ZstdDecompressorReader )
1125- is_binary_class = isinstance (handle , tuple (binary_classes ))
1104+ return isinstance (handle , _get_binary_io_classes ()) or "b" in getattr (
1105+ handle , "mode" , mode
1106+ )
1107+
1108+
1109+ def _get_binary_io_classes () -> tuple [type ]:
1110+ """IO classes that that expect bytes"""
1111+ binary_classes : tuple [type ] = (BufferedIOBase , RawIOBase )
1112+
1113+ # python-zstandard doesn't use any of the builtin base classes; instead we
1114+ # have to use the `zstd.ZstdDecompressionReader` class for isinstance checks.
1115+ # Unfortunately `zstd.ZstdDecompressionReader` isn't exposed by python-zstandard
1116+ # so we have to get it from a `zstd.ZstdDecompressor` instance.
1117+ # See also https://github.com/indygreg/python-zstandard/pull/165.
1118+ zstd = import_optional_dependency ("zstandard" , errors = "ignore" )
1119+ if zstd is not None :
1120+ with zstd .ZstdDecompressor ().stream_reader (b"" ) as reader :
1121+ binary_classes += (type (reader ),)
11261122
1127- return is_binary_class or "b" in getattr ( handle , "mode" , mode )
1123+ return binary_classes
0 commit comments