From 370213e7f76a9792b6f08b5074061b33f3a747f5 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 9 Sep 2020 11:09:18 +0200 Subject: [PATCH 1/9] ARROW-9645: [Python] Deprecate pyarrow.filesystem in favor of pyarrow.fs --- python/pyarrow/__init__.py | 41 ++++++++++++++++++++++--- python/pyarrow/filesystem.py | 25 ++++++++++++--- python/pyarrow/tests/test_filesystem.py | 20 ++++++++++++ python/pyarrow/tests/test_parquet.py | 40 ++++++++++++------------ python/pyarrow/util.py | 3 +- 5 files changed, 99 insertions(+), 30 deletions(-) diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index 9e22cc013f8..a229d798eb2 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -32,6 +32,7 @@ import gc as _gc import os as _os import sys as _sys +import warnings as _warnings try: from ._generated_version import version as __version__ @@ -190,23 +191,53 @@ def show_versions(): SerializationCallbackError, DeserializationCallbackError) -from pyarrow.filesystem import FileSystem, LocalFileSystem - from pyarrow.hdfs import HadoopFileSystem import pyarrow.hdfs as hdfs from pyarrow.ipc import serialize_pandas, deserialize_pandas import pyarrow.ipc as ipc - -localfs = LocalFileSystem.get_instance() - from pyarrow.serialization import (default_serialization_context, register_default_serialization_handlers, register_torch_serialization_handlers) import pyarrow.types as types + +# deprecated filesystems + +from pyarrow.filesystem import FileSystem as _FileSystem, LocalFileSystem as _LocalFileSystem + +_localfs = _LocalFileSystem._get_instance() + + +_msg = "pyarrow.{0} is deprecated as of 2.0.0, please use pyarrow.fs.{1} instead." + + +if _sys.version_info >= (3, 7): + def __getattr__(name): + if name == "localfs": + _warnings.warn(_msg.format("localfs", "LocalFileSystem"), + DeprecationWarning, stacklevel=2) + return _localfs + elif name == "FileSystem": + _warnings.warn(_msg.format("FileSystem", "FileSystem"), + DeprecationWarning, stacklevel=2) + return _FileSystem + elif name == "LocalFileSystem": + _warnings.warn(_msg.format("LocalFileSystem", "LocalFileSystem"), + DeprecationWarning, stacklevel=2) + return _LocalFileSystem + + raise AttributeError( + "module 'pyarrow' has no attribute '{0}'".format(name) + ) +else: + localfs = _localfs + FileSystem = _FileSystem + LocalFileSystem = _LocalFileSystem + + # Entry point for starting the plasma store diff --git a/python/pyarrow/filesystem.py b/python/pyarrow/filesystem.py index bc4c471515d..c36dc115e41 100644 --- a/python/pyarrow/filesystem.py +++ b/python/pyarrow/filesystem.py @@ -20,6 +20,7 @@ import inspect import posixpath import urllib.parse +import warnings from os.path import join as pjoin @@ -237,12 +238,28 @@ class LocalFileSystem(FileSystem): _instance = None + def __init__(self): + warnings.warn( + "pyarrow.filesystem.LocalFileSystem is deprecated as of 2.0.0, " + "please use pyarrow.fs.LocalFileSystem instead", + DeprecationWarning, stacklevel=2) + super().__init__() + @classmethod - def get_instance(cls): + def _get_instance(cls): if cls._instance is None: - cls._instance = LocalFileSystem() + with warnings.catch_warnings(): + cls._instance = LocalFileSystem() return cls._instance + @classmethod + def get_instance(cls): + warnings.warn( + "pyarrow.filesystem.LocalFileSystem is deprecated as of 2.0.0, " + "please use pyarrow.fs.LocalFileSystem instead", + DeprecationWarning, stacklevel=2) + return cls._get_instance() + @implements(FileSystem.ls) def ls(self, path): path = _stringify_path(path) @@ -480,11 +497,11 @@ def resolve_filesystem_and_path(where, filesystem=None): fs_path = parsed_uri.path elif parsed_uri.scheme == 'file': # Input is local URI such as file:///home/user/myfile.parquet - fs = LocalFileSystem.get_instance() + fs = LocalFileSystem._get_instance() fs_path = parsed_uri.path else: # Input is local path such as /home/user/myfile.parquet - fs = LocalFileSystem.get_instance() + fs = LocalFileSystem._get_instance() fs_path = path return fs, fs_path diff --git a/python/pyarrow/tests/test_filesystem.py b/python/pyarrow/tests/test_filesystem.py index 4a6606ff51a..29388953c41 100644 --- a/python/pyarrow/tests/test_filesystem.py +++ b/python/pyarrow/tests/test_filesystem.py @@ -15,8 +15,28 @@ # specific language governing permissions and limitations # under the License. +import pyarrow as pa from pyarrow import filesystem +import pytest + + +def test_filesystem_deprecated(): + with pytest.warns(DeprecationWarning): + filesystem.LocalFileSystem() + + with pytest.warns(DeprecationWarning): + filesystem.LocalFileSystem.get_instance() + + with pytest.warns(DeprecationWarning): + pa.localfs + + with pytest.warns(DeprecationWarning): + pa.FileSystem + + with pytest.warns(DeprecationWarning): + pa.LocalFileSystem + def test_resolve_uri(): uri = "file:///home/user/myfile.parquet" diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py index b2026f88599..48501e86274 100644 --- a/python/pyarrow/tests/test_parquet.py +++ b/python/pyarrow/tests/test_parquet.py @@ -1712,13 +1712,13 @@ def test_partition_set_dictionary_type(): @pytest.mark.pandas @parametrize_legacy_dataset def test_read_partitioned_directory(tempdir, use_legacy_dataset): - fs = LocalFileSystem.get_instance() + fs = LocalFileSystem._get_instance() _partition_test_for_filesystem(fs, tempdir, use_legacy_dataset) @pytest.mark.pandas def test_create_parquet_dataset_multi_threaded(tempdir): - fs = LocalFileSystem.get_instance() + fs = LocalFileSystem._get_instance() base_path = tempdir _partition_test_for_filesystem(fs, base_path) @@ -1738,7 +1738,7 @@ def test_create_parquet_dataset_multi_threaded(tempdir): def test_read_partitioned_columns_selection(tempdir, use_legacy_dataset): # ARROW-3861 - do not include partition columns in resulting table when # `columns` keyword was passed without those columns - fs = LocalFileSystem.get_instance() + fs = LocalFileSystem._get_instance() base_path = tempdir _partition_test_for_filesystem(fs, base_path) @@ -1757,7 +1757,7 @@ def test_read_partitioned_columns_selection(tempdir, use_legacy_dataset): @pytest.mark.pandas @parametrize_legacy_dataset def test_filters_equivalency(tempdir, use_legacy_dataset): - fs = LocalFileSystem.get_instance() + fs = LocalFileSystem._get_instance() base_path = tempdir integer_keys = [0, 1] @@ -1845,7 +1845,7 @@ def test_filters_equivalency(tempdir, use_legacy_dataset): @pytest.mark.pandas @parametrize_legacy_dataset def test_filters_cutoff_exclusive_integer(tempdir, use_legacy_dataset): - fs = LocalFileSystem.get_instance() + fs = LocalFileSystem._get_instance() base_path = tempdir integer_keys = [0, 1, 2, 3, 4] @@ -1887,7 +1887,7 @@ def test_filters_cutoff_exclusive_integer(tempdir, use_legacy_dataset): reason='Loss of type information in creation of categoricals.' ) def test_filters_cutoff_exclusive_datetime(tempdir, use_legacy_dataset): - fs = LocalFileSystem.get_instance() + fs = LocalFileSystem._get_instance() base_path = tempdir date_keys = [ @@ -1932,7 +1932,7 @@ def test_filters_cutoff_exclusive_datetime(tempdir, use_legacy_dataset): @pytest.mark.pandas @parametrize_legacy_dataset def test_filters_inclusive_integer(tempdir, use_legacy_dataset): - fs = LocalFileSystem.get_instance() + fs = LocalFileSystem._get_instance() base_path = tempdir integer_keys = [0, 1, 2, 3, 4] @@ -1968,7 +1968,7 @@ def test_filters_inclusive_integer(tempdir, use_legacy_dataset): @pytest.mark.pandas @parametrize_legacy_dataset def test_filters_inclusive_set(tempdir, use_legacy_dataset): - fs = LocalFileSystem.get_instance() + fs = LocalFileSystem._get_instance() base_path = tempdir integer_keys = [0, 1] @@ -2006,7 +2006,7 @@ def test_filters_inclusive_set(tempdir, use_legacy_dataset): @pytest.mark.pandas @parametrize_legacy_dataset def test_filters_invalid_pred_op(tempdir, use_legacy_dataset): - fs = LocalFileSystem.get_instance() + fs = LocalFileSystem._get_instance() base_path = tempdir integer_keys = [0, 1, 2, 3, 4] @@ -2054,7 +2054,7 @@ def test_filters_invalid_pred_op(tempdir, use_legacy_dataset): def test_filters_invalid_column(tempdir, use_legacy_dataset): # ARROW-5572 - raise error on invalid name in filter specification # works with new dataset / xfail with legacy implementation - fs = LocalFileSystem.get_instance() + fs = LocalFileSystem._get_instance() base_path = tempdir integer_keys = [0, 1, 2, 3, 4] @@ -2079,7 +2079,7 @@ def test_filters_invalid_column(tempdir, use_legacy_dataset): @parametrize_legacy_dataset def test_filters_read_table(tempdir, use_legacy_dataset): # test that filters keyword is passed through in read_table - fs = LocalFileSystem.get_instance() + fs = LocalFileSystem._get_instance() base_path = tempdir integer_keys = [0, 1, 2, 3, 4] @@ -2116,7 +2116,7 @@ def test_filters_read_table(tempdir, use_legacy_dataset): def test_partition_keys_with_underscores(tempdir, use_legacy_dataset): # ARROW-5666 - partition field values with underscores preserve underscores # xfail with legacy dataset -> they get interpreted as integers - fs = LocalFileSystem.get_instance() + fs = LocalFileSystem._get_instance() base_path = tempdir string_keys = ["2019_2", "2019_3"] @@ -2311,13 +2311,13 @@ def _test_read_common_metadata_files(fs, base_path): @pytest.mark.pandas def test_read_common_metadata_files(tempdir): - fs = LocalFileSystem.get_instance() + fs = LocalFileSystem._get_instance() _test_read_common_metadata_files(fs, tempdir) @pytest.mark.pandas def test_read_metadata_files(tempdir): - fs = LocalFileSystem.get_instance() + fs = LocalFileSystem._get_instance() N = 100 df = pd.DataFrame({ @@ -2426,7 +2426,7 @@ def read_multiple_files(paths, columns=None, use_threads=True, **kwargs): result2 = read_multiple_files(paths, metadata=metadata) assert result2.equals(expected) - result3 = pa.localfs.read_parquet(dirpath, schema=metadata.schema) + result3 = pq.ParquetDataset(dirpath, schema=metadata.schema).read() assert result3.equals(expected) else: with pytest.raises(ValueError, match="no longer supported"): @@ -2436,14 +2436,14 @@ def read_multiple_files(paths, columns=None, use_threads=True, **kwargs): to_read = [0, 2, 6, result.num_columns - 1] col_names = [result.field(i).name for i in to_read] - out = pa.localfs.read_parquet(dirpath, columns=col_names) + out = pq.read_table(dirpath, columns=col_names) expected = pa.Table.from_arrays([result.column(i) for i in to_read], names=col_names, metadata=result.schema.metadata) assert out.equals(expected) # Read with multiple threads - pa.localfs.read_parquet(dirpath, use_threads=True) + pq.read_table(dirpath, use_threads=True) # Test failure modes with non-uniform metadata bad_apple = _test_dataframe(size, seed=i).iloc[:, :4] @@ -2892,7 +2892,7 @@ def _test_write_to_dataset_no_partitions(base_path, output_table = pa.Table.from_pandas(output_df) if filesystem is None: - filesystem = LocalFileSystem.get_instance() + filesystem = LocalFileSystem._get_instance() # Without partitions, append files to root_path n = 5 @@ -3315,7 +3315,7 @@ def test_backwards_compatible_column_metadata_handling( # TODO(dataset) support pickling def _make_dataset_for_pickling(tempdir, N=100): path = tempdir / 'data.parquet' - fs = LocalFileSystem.get_instance() + fs = LocalFileSystem._get_instance() df = pd.DataFrame({ 'index': np.arange(N), @@ -3552,7 +3552,7 @@ def test_parquet_file_pass_directory_instead_of_file(tempdir): @pytest.mark.pandas @pytest.mark.parametrize("filesystem", [ None, - LocalFileSystem.get_instance(), + LocalFileSystem._get_instance(), fs.LocalFileSystem(), ]) def test_parquet_writer_filesystem_local(tempdir, filesystem): diff --git a/python/pyarrow/util.py b/python/pyarrow/util.py index 690ba3f1fc1..c11fe26a9a2 100644 --- a/python/pyarrow/util.py +++ b/python/pyarrow/util.py @@ -52,7 +52,8 @@ class _DeprecatedMeta(type): def __instancecheck__(self, other): warnings.warn( msg.format(old_name, next_version, new_class.__name__), - FutureWarning + FutureWarning, + stacklevel=2 ) return isinstance(other, new_class) From 17883b50a5da2bc887ed473503185852995bfd5c Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 9 Sep 2020 11:22:06 +0200 Subject: [PATCH 2/9] fixup test --- python/pyarrow/tests/test_parquet.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py index 48501e86274..b015d890d0d 100644 --- a/python/pyarrow/tests/test_parquet.py +++ b/python/pyarrow/tests/test_parquet.py @@ -2436,14 +2436,18 @@ def read_multiple_files(paths, columns=None, use_threads=True, **kwargs): to_read = [0, 2, 6, result.num_columns - 1] col_names = [result.field(i).name for i in to_read] - out = pq.read_table(dirpath, columns=col_names) + out = pq.read_table( + dirpath, columns=col_names, use_legacy_dataset=use_legacy_dataset + ) expected = pa.Table.from_arrays([result.column(i) for i in to_read], names=col_names, metadata=result.schema.metadata) assert out.equals(expected) # Read with multiple threads - pq.read_table(dirpath, use_threads=True) + pq.read_table( + dirpath, use_threads=True, use_legacy_dataset=use_legacy_dataset + ) # Test failure modes with non-uniform metadata bad_apple = _test_dataframe(size, seed=i).iloc[:, :4] From 9b51079c392af7fd877d41a2747764337a96fa2e Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 17 Sep 2020 14:30:14 +0200 Subject: [PATCH 3/9] fix test for older python versions --- python/pyarrow/tests/test_filesystem.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/python/pyarrow/tests/test_filesystem.py b/python/pyarrow/tests/test_filesystem.py index 29388953c41..5acd806dc5e 100644 --- a/python/pyarrow/tests/test_filesystem.py +++ b/python/pyarrow/tests/test_filesystem.py @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +import sys + import pyarrow as pa from pyarrow import filesystem @@ -28,6 +30,11 @@ def test_filesystem_deprecated(): with pytest.warns(DeprecationWarning): filesystem.LocalFileSystem.get_instance() + +@pytest.mark.skipif(sys.version_info < (3, 7), + reason="getattr needs Python 3.7") +def test_filesystem_deprecated_toplevel(): + with pytest.warns(DeprecationWarning): pa.localfs From 9da29c513c25ec2429fc4fe9ef510b9d005b1021 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 17 Sep 2020 15:43:31 +0200 Subject: [PATCH 4/9] some changes to accomodate fsspec filesystems --- python/pyarrow/filesystem.py | 13 ++++++++++++- python/pyarrow/parquet.py | 2 +- python/pyarrow/tests/test_parquet.py | 2 +- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/python/pyarrow/filesystem.py b/python/pyarrow/filesystem.py index c36dc115e41..994f440e653 100644 --- a/python/pyarrow/filesystem.py +++ b/python/pyarrow/filesystem.py @@ -448,7 +448,18 @@ def _ensure_filesystem(fs): # In case its a simple LocalFileSystem (e.g. dask) use native arrow # FS elif mro.__name__ == 'LocalFileSystem': - return LocalFileSystem.get_instance() + return LocalFileSystem._get_instance() + + try: + import fsspec + except ImportError: + pass + else: + if isinstance(fs, fsspec.AbstractFileSystem): + # for recent fsspec versions that stop inheriting from + # pyarrow.filesystem.FileSystem, still allow fsspec + # filesystems (which should be compatible with our legacy fs) + return fs raise OSError('Unrecognized filesystem: {}'.format(fs_type)) else: diff --git a/python/pyarrow/parquet.py b/python/pyarrow/parquet.py index b6dc6a8bd3b..14d77fe0d21 100644 --- a/python/pyarrow/parquet.py +++ b/python/pyarrow/parquet.py @@ -1348,7 +1348,7 @@ def _make_manifest(path_or_paths, fs, pathsep='/', metadata_nthreads=1, if _is_path_like(path_or_paths) and fs.isdir(path_or_paths): manifest = ParquetManifest(path_or_paths, filesystem=fs, open_file_func=open_file_func, - pathsep=fs.pathsep, + pathsep=getattr(fs, "pathsep", "/"), metadata_nthreads=metadata_nthreads) common_metadata_path = manifest.common_metadata_path metadata_path = manifest.metadata_path diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py index b015d890d0d..36e0ff67cec 100644 --- a/python/pyarrow/tests/test_parquet.py +++ b/python/pyarrow/tests/test_parquet.py @@ -499,7 +499,7 @@ def test_multiple_path_types(tempdir, use_legacy_dataset): @parametrize_legacy_dataset @pytest.mark.parametrize("filesystem", [ - None, fs.LocalFileSystem(), LocalFileSystem.get_instance() + None, fs.LocalFileSystem(), LocalFileSystem._get_instance() ]) def test_relative_paths(tempdir, use_legacy_dataset, filesystem): # reading and writing from relative paths From 192f754cb1662d4a22a0202a28ee99718b3f47be Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 30 Sep 2020 18:27:06 +0200 Subject: [PATCH 5/9] also deprecate pyarrow.HadoopFileSystem --- docs/source/python/filesystems_deprecated.rst | 6 +++--- python/pyarrow/__init__.py | 7 ++++++- python/pyarrow/hdfs.py | 13 ++++++++++--- python/pyarrow/tests/test_filesystem.py | 3 +++ 4 files changed, 22 insertions(+), 7 deletions(-) diff --git a/docs/source/python/filesystems_deprecated.rst b/docs/source/python/filesystems_deprecated.rst index 51a07d5efa6..04887e97738 100644 --- a/docs/source/python/filesystems_deprecated.rst +++ b/docs/source/python/filesystems_deprecated.rst @@ -18,9 +18,9 @@ Filesystem Interface (legacy) ============================= -.. note:: - This section documents the deprecated filesystem layer. It is highly - recommended to use the :ref:`new filesystem layer ` instead. +.. warning:: + This section documents the deprecated filesystem layer. You should + use the :ref:`new filesystem layer ` instead. .. _hdfs: diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index a229d798eb2..5d985d5cdf6 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -191,7 +191,6 @@ def show_versions(): SerializationCallbackError, DeserializationCallbackError) -from pyarrow.hdfs import HadoopFileSystem import pyarrow.hdfs as hdfs from pyarrow.ipc import serialize_pandas, deserialize_pandas @@ -207,6 +206,7 @@ def show_versions(): # deprecated filesystems from pyarrow.filesystem import FileSystem as _FileSystem, LocalFileSystem as _LocalFileSystem +from pyarrow.hdfs import HadoopFileSystem as _HadoopFileSystem _localfs = _LocalFileSystem._get_instance() @@ -228,6 +228,10 @@ def __getattr__(name): _warnings.warn(_msg.format("LocalFileSystem", "LocalFileSystem"), DeprecationWarning, stacklevel=2) return _LocalFileSystem + elif name == "HadoopFileSystem": + _warnings.warn(_msg.format("HadoopFileSystem", "HadoopFileSystem"), + DeprecationWarning, stacklevel=2) + return _HadoopFileSystem raise AttributeError( "module 'pyarrow' has no attribute '{0}'".format(name) @@ -236,6 +240,7 @@ def __getattr__(name): localfs = _localfs FileSystem = _FileSystem LocalFileSystem = _LocalFileSystem + HadoopFileSystem = _HadoopFileSystem # Entry point for starting the plasma store diff --git a/python/pyarrow/hdfs.py b/python/pyarrow/hdfs.py index f4beec37387..4e764228e3c 100644 --- a/python/pyarrow/hdfs.py +++ b/python/pyarrow/hdfs.py @@ -19,6 +19,7 @@ import os import posixpath import sys +import warnings from pyarrow.util import implements from pyarrow.filesystem import FileSystem @@ -34,6 +35,10 @@ class HadoopFileSystem(lib.HadoopFileSystem, FileSystem): def __init__(self, host="default", port=0, user=None, kerb_ticket=None, driver='libhdfs', extra_conf=None): + warnings.warn( + "pyarrow.hdfs.HadoopFileSystem is deprecated as of 2.0.0, " + "please use pyarrow.fs.HadoopFileSystem instead", + DeprecationWarning, stacklevel=2) if driver == 'libhdfs': _maybe_set_hadoop_classpath() @@ -205,7 +210,9 @@ def connect(host="default", port=0, user=None, kerb_ticket=None, ------- filesystem : HadoopFileSystem """ - fs = HadoopFileSystem(host=host, port=port, user=user, - kerb_ticket=kerb_ticket, - extra_conf=extra_conf) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + fs = HadoopFileSystem(host=host, port=port, user=user, + kerb_ticket=kerb_ticket, + extra_conf=extra_conf) return fs diff --git a/python/pyarrow/tests/test_filesystem.py b/python/pyarrow/tests/test_filesystem.py index 5acd806dc5e..b859a4353ee 100644 --- a/python/pyarrow/tests/test_filesystem.py +++ b/python/pyarrow/tests/test_filesystem.py @@ -44,6 +44,9 @@ def test_filesystem_deprecated_toplevel(): with pytest.warns(DeprecationWarning): pa.LocalFileSystem + with pytest.warns(DeprecationWarning): + pa.HadoopFileSystem + def test_resolve_uri(): uri = "file:///home/user/myfile.parquet" From dfdc62246bb4bb450bbbaba2b4247d3edeeb2264 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 1 Oct 2020 10:37:40 +0200 Subject: [PATCH 6/9] deprecate connect --- python/pyarrow/filesystem.py | 2 +- python/pyarrow/hdfs.py | 15 ++++++++++++++- python/pyarrow/tests/test_hdfs.py | 3 ++- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/python/pyarrow/filesystem.py b/python/pyarrow/filesystem.py index 994f440e653..787f7255d44 100644 --- a/python/pyarrow/filesystem.py +++ b/python/pyarrow/filesystem.py @@ -504,7 +504,7 @@ def resolve_filesystem_and_path(where, filesystem=None): port = 0 if len(netloc_split) == 2 and netloc_split[1].isnumeric(): port = int(netloc_split[1]) - fs = pa.hdfs.connect(host=host, port=port) + fs = pa.hdfs._connect(host=host, port=port) fs_path = parsed_uri.path elif parsed_uri.scheme == 'file': # Input is local URI such as file:///home/user/myfile.parquet diff --git a/python/pyarrow/hdfs.py b/python/pyarrow/hdfs.py index 4e764228e3c..4601de453fe 100644 --- a/python/pyarrow/hdfs.py +++ b/python/pyarrow/hdfs.py @@ -36,7 +36,7 @@ class HadoopFileSystem(lib.HadoopFileSystem, FileSystem): def __init__(self, host="default", port=0, user=None, kerb_ticket=None, driver='libhdfs', extra_conf=None): warnings.warn( - "pyarrow.hdfs.HadoopFileSystem is deprecated as of 2.0.0, " + "'pyarrow.hdfs.HadoopFileSystem' is deprecated as of 2.0.0, " "please use pyarrow.fs.HadoopFileSystem instead", DeprecationWarning, stacklevel=2) if driver == 'libhdfs': @@ -210,6 +210,19 @@ def connect(host="default", port=0, user=None, kerb_ticket=None, ------- filesystem : HadoopFileSystem """ + warnings.warn( + "'pyarrow.hdfs.connect' is deprecated as of 2.0.0, " + "please use pyarrow.fs.HadoopFileSystem instead", + DeprecationWarning, stacklevel=2 + ) + return _connect( + host=host, port=port, user=user, kerb_ticket=kerb_ticket, + extra_conf=extra_conf + ) + + +def _connect(host="default", port=0, user=None, kerb_ticket=None, + extra_conf=None): with warnings.catch_warnings(): warnings.simplefilter("ignore") fs = HadoopFileSystem(host=host, port=port, user=user, diff --git a/python/pyarrow/tests/test_hdfs.py b/python/pyarrow/tests/test_hdfs.py index 2110be59069..c048f6557ae 100644 --- a/python/pyarrow/tests/test_hdfs.py +++ b/python/pyarrow/tests/test_hdfs.py @@ -46,7 +46,8 @@ def hdfs_test_client(): raise ValueError('Env variable ARROW_HDFS_TEST_PORT was not ' 'an integer') - return pa.hdfs.connect(host, port, user) + with pytest.warns(DeprecationWarning): + return pa.hdfs.connect(host, port, user) @pytest.mark.hdfs From a2ccf4403690170f91719bf779e709c864c1f072 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 7 Oct 2020 15:40:34 +0200 Subject: [PATCH 7/9] simplified logic in __init__.py --- python/pyarrow/__init__.py | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index 5d985d5cdf6..9f544a12184 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -213,25 +213,20 @@ def show_versions(): _msg = "pyarrow.{0} is deprecated as of 2.0.0, please use pyarrow.fs.{1} instead." +_deprecated = { + "localfs": (_localfs, "LocalFileSystem"), + "FileSystem": (_FileSystem, "FileSystem"), + "LocalFileSystem": (_LocalFileSystem, "LocalFileSystem"), + "HadoopFileSystem": (_HadoopFileSystem, "HadoopFileSystem"), +} if _sys.version_info >= (3, 7): def __getattr__(name): - if name == "localfs": - _warnings.warn(_msg.format("localfs", "LocalFileSystem"), + if name in _deprecated: + obj, new_name = _deprecated[name] + _warnings.warn(_msg.format(name, new_name), DeprecationWarning, stacklevel=2) - return _localfs - elif name == "FileSystem": - _warnings.warn(_msg.format("FileSystem", "FileSystem"), - DeprecationWarning, stacklevel=2) - return _FileSystem - elif name == "LocalFileSystem": - _warnings.warn(_msg.format("LocalFileSystem", "LocalFileSystem"), - DeprecationWarning, stacklevel=2) - return _LocalFileSystem - elif name == "HadoopFileSystem": - _warnings.warn(_msg.format("HadoopFileSystem", "HadoopFileSystem"), - DeprecationWarning, stacklevel=2) - return _HadoopFileSystem + return obj raise AttributeError( "module 'pyarrow' has no attribute '{0}'".format(name) From 642c1d644084eb4d0a3e55d649fe60739a28a81d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 7 Oct 2020 15:57:25 +0200 Subject: [PATCH 8/9] centralize deprecation message template --- python/pyarrow/filesystem.py | 17 ++++++++--------- python/pyarrow/hdfs.py | 9 ++++----- python/pyarrow/util.py | 12 +++++++----- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/python/pyarrow/filesystem.py b/python/pyarrow/filesystem.py index 787f7255d44..6bb4fa14146 100644 --- a/python/pyarrow/filesystem.py +++ b/python/pyarrow/filesystem.py @@ -25,7 +25,12 @@ from os.path import join as pjoin import pyarrow as pa -from pyarrow.util import implements, _stringify_path, _is_path_like +from pyarrow.util import implements, _stringify_path, _is_path_like, _DEPR_MSG + + +_FS_DEPR_MSG = _DEPR_MSG.format( + "filesystem.LocalFileSystem", "2.0.0", "fs.LocalFileSystem" +) class FileSystem: @@ -239,10 +244,7 @@ class LocalFileSystem(FileSystem): _instance = None def __init__(self): - warnings.warn( - "pyarrow.filesystem.LocalFileSystem is deprecated as of 2.0.0, " - "please use pyarrow.fs.LocalFileSystem instead", - DeprecationWarning, stacklevel=2) + warnings.warn(_FS_DEPR_MSG, DeprecationWarning, stacklevel=2) super().__init__() @classmethod @@ -254,10 +256,7 @@ def _get_instance(cls): @classmethod def get_instance(cls): - warnings.warn( - "pyarrow.filesystem.LocalFileSystem is deprecated as of 2.0.0, " - "please use pyarrow.fs.LocalFileSystem instead", - DeprecationWarning, stacklevel=2) + warnings.warn(_FS_DEPR_MSG, DeprecationWarning, stacklevel=2) return cls._get_instance() @implements(FileSystem.ls) diff --git a/python/pyarrow/hdfs.py b/python/pyarrow/hdfs.py index 4601de453fe..eb1b019bf91 100644 --- a/python/pyarrow/hdfs.py +++ b/python/pyarrow/hdfs.py @@ -21,7 +21,7 @@ import sys import warnings -from pyarrow.util import implements +from pyarrow.util import implements, _DEPR_MSG from pyarrow.filesystem import FileSystem import pyarrow.lib as lib @@ -36,8 +36,8 @@ class HadoopFileSystem(lib.HadoopFileSystem, FileSystem): def __init__(self, host="default", port=0, user=None, kerb_ticket=None, driver='libhdfs', extra_conf=None): warnings.warn( - "'pyarrow.hdfs.HadoopFileSystem' is deprecated as of 2.0.0, " - "please use pyarrow.fs.HadoopFileSystem instead", + _DEPR_MSG.format( + "hdfs.HadoopFileSystem", "2.0.0", "fs.HadoopFileSystem"), DeprecationWarning, stacklevel=2) if driver == 'libhdfs': _maybe_set_hadoop_classpath() @@ -211,8 +211,7 @@ def connect(host="default", port=0, user=None, kerb_ticket=None, filesystem : HadoopFileSystem """ warnings.warn( - "'pyarrow.hdfs.connect' is deprecated as of 2.0.0, " - "please use pyarrow.fs.HadoopFileSystem instead", + _DEPR_MSG.format("hdfs.connect", "2.0.0", "fs.HadoopFileSystem"), DeprecationWarning, stacklevel=2 ) return _connect( diff --git a/python/pyarrow/util.py b/python/pyarrow/util.py index c11fe26a9a2..e91294a3a1b 100644 --- a/python/pyarrow/util.py +++ b/python/pyarrow/util.py @@ -24,6 +24,11 @@ import warnings +_DEPR_MSG = ( + "pyarrow.{} is deprecated as of {}, please use pyarrow.{} instead." +) + + def implements(f): def decorator(g): g.__doc__ = f.__doc__ @@ -32,8 +37,7 @@ def decorator(g): def _deprecate_api(old_name, new_name, api, next_version): - msg = ('pyarrow.{} is deprecated as of {}, please use pyarrow.{} instead' - .format(old_name, next_version, new_name)) + msg = _DEPR_MSG.format(old_name, next_version, new_name) def wrapper(*args, **kwargs): warnings.warn(msg, FutureWarning) @@ -46,12 +50,10 @@ def _deprecate_class(old_name, new_class, next_version, """ Raise warning if a deprecated class is used in an isinstance check. """ - msg = 'pyarrow.{} is deprecated as of {}, please use pyarrow.{} instead' - class _DeprecatedMeta(type): def __instancecheck__(self, other): warnings.warn( - msg.format(old_name, next_version, new_class.__name__), + _DEPR_MSG.format(old_name, next_version, new_class.__name__), FutureWarning, stacklevel=2 ) From 122b5c8ea4b90108f2308b022bf877c13e52f108 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 7 Oct 2020 16:51:40 +0200 Subject: [PATCH 9/9] check for fsspec in sys.modules --- python/pyarrow/filesystem.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/python/pyarrow/filesystem.py b/python/pyarrow/filesystem.py index 6bb4fa14146..0831adbd3e7 100644 --- a/python/pyarrow/filesystem.py +++ b/python/pyarrow/filesystem.py @@ -19,6 +19,7 @@ import os import inspect import posixpath +import sys import urllib.parse import warnings @@ -449,11 +450,8 @@ def _ensure_filesystem(fs): elif mro.__name__ == 'LocalFileSystem': return LocalFileSystem._get_instance() - try: - import fsspec - except ImportError: - pass - else: + if "fsspec" in sys.modules: + fsspec = sys.modules["fsspec"] if isinstance(fs, fsspec.AbstractFileSystem): # for recent fsspec versions that stop inheriting from # pyarrow.filesystem.FileSystem, still allow fsspec