From 2a4bc8ec8ef4f4dd51955d15fb538d0a084e93b9 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Tue, 30 Mar 2021 17:30:43 +0200 Subject: [PATCH 1/5] ARROW-12068: [Python] Stop using distutils According to PEP 632, distutils will be deprecated in Python 3.10 and removed in 3.12. * switch to `setuptools` for general packaging * use the `Version` class from the `packaging` project instead of `distutils.LooseVersion` --- dev/tasks/conda-recipes/azure.clean.yml | 2 +- dev/tasks/conda-recipes/clean.py | 5 +- python/examples/plasma/sorting/setup.py | 2 +- python/pyarrow/feather.py | 3 +- python/pyarrow/pandas-shim.pxi | 13 +- python/pyarrow/tests/parquet/test_dataset.py | 4 +- python/pyarrow/tests/parquet/test_pandas.py | 4 +- python/pyarrow/tests/test_fs.py | 5 +- python/pyarrow/tests/test_pandas.py | 40 +- python/pyarrow/tests/test_schema.py | 5 +- python/pyarrow/vendored/__init__.py | 0 python/pyarrow/vendored/version.py | 545 +++++++++++++++++++ python/setup.py | 28 +- 13 files changed, 604 insertions(+), 52 deletions(-) create mode 100644 python/pyarrow/vendored/__init__.py create mode 100644 python/pyarrow/vendored/version.py diff --git a/dev/tasks/conda-recipes/azure.clean.yml b/dev/tasks/conda-recipes/azure.clean.yml index 32deccac0ba0..55ac36528add 100644 --- a/dev/tasks/conda-recipes/azure.clean.yml +++ b/dev/tasks/conda-recipes/azure.clean.yml @@ -13,7 +13,7 @@ jobs: displayName: Clone arrow - script: | - conda install -y -c conda-forge pandas anaconda-client + conda install -y -c conda-forge pandas anaconda-client packaging displayName: Install requirements - script: | diff --git a/dev/tasks/conda-recipes/clean.py b/dev/tasks/conda-recipes/clean.py index 3e77f0e7ac74..bd31c875dc38 100644 --- a/dev/tasks/conda-recipes/clean.py +++ b/dev/tasks/conda-recipes/clean.py @@ -1,4 +1,3 @@ -from distutils.version import LooseVersion from subprocess import check_output, check_call from typing import List @@ -7,6 +6,8 @@ import pandas as pd import sys +from packaging.version import Version + VERSIONS_TO_KEEP = 5 PACKAGES = [ @@ -44,7 +45,7 @@ def packages_to_delete(package_name: str, platform: str) -> List[str]: env=env, ) pkgs = pd.DataFrame(json.loads(pkgs_json)[package_name]) - pkgs["version"] = pkgs["version"].map(LooseVersion) + pkgs["version"] = pkgs["version"].map(Version) pkgs["py_version"] = pkgs["build"].str.slice(0, 4) to_delete = [] diff --git a/python/examples/plasma/sorting/setup.py b/python/examples/plasma/sorting/setup.py index a578085a8e4c..a5dfa5ae0a7a 100644 --- a/python/examples/plasma/sorting/setup.py +++ b/python/examples/plasma/sorting/setup.py @@ -16,7 +16,7 @@ # under the License. import numpy as np -from distutils.core import setup +from setuptools import setup from Cython.Build import cythonize setup( diff --git a/python/pyarrow/feather.py b/python/pyarrow/feather.py index d42662bbf1a9..025c2330df55 100644 --- a/python/pyarrow/feather.py +++ b/python/pyarrow/feather.py @@ -22,10 +22,11 @@ from pyarrow.lib import (Codec, FeatherError, Table, # noqa concat_tables, schema) import pyarrow.lib as ext +from pyarrow.vendored.version import Version def _check_pandas_version(): - if _pandas_api.loose_version < '0.17.0': + if _pandas_api.loose_version < Version('0.17.0'): raise ImportError("feather requires pandas >= 0.17.0") diff --git a/python/pyarrow/pandas-shim.pxi b/python/pyarrow/pandas-shim.pxi index edb7ff6c24ac..0e7cfe937428 100644 --- a/python/pyarrow/pandas-shim.pxi +++ b/python/pyarrow/pandas-shim.pxi @@ -53,12 +53,13 @@ cdef class _PandasAPIShim(object): else: return + from pyarrow.vendored.version import Version + self._pd = pd self._version = pd.__version__ - from distutils.version import LooseVersion - self._loose_version = LooseVersion(pd.__version__) + self._loose_version = Version(pd.__version__) - if self._loose_version < LooseVersion('0.23.0'): + if self._loose_version < Version('0.23.0'): self._have_pandas = False if raise_: raise ImportError( @@ -82,7 +83,7 @@ cdef class _PandasAPIShim(object): self._series, self._index, self._categorical_type, self._extension_array) self._extension_dtype = pd.api.extensions.ExtensionDtype - if self._loose_version >= LooseVersion('0.24.0'): + if self._loose_version >= Version('0.24.0'): self._is_extension_array_dtype = \ pd.api.types.is_extension_array_dtype else: @@ -92,12 +93,12 @@ cdef class _PandasAPIShim(object): self._datetimetz_type = pd.api.types.DatetimeTZDtype self._have_pandas = True - if self._loose_version > LooseVersion('0.25'): + if self._loose_version > Version('0.25'): self.has_sparse = False else: self.has_sparse = True - self._pd024 = self._loose_version >= LooseVersion('0.24') + self._pd024 = self._loose_version >= Version('0.24') cdef inline _check_import(self, bint raise_=True): if self._tried_importing_pandas: diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py index a62ca2ed1a48..a0d417bf7fdf 100644 --- a/python/pyarrow/tests/parquet/test_dataset.py +++ b/python/pyarrow/tests/parquet/test_dataset.py @@ -17,7 +17,6 @@ import datetime import os -from distutils.version import LooseVersion import numpy as np import pytest @@ -30,6 +29,7 @@ parametrize_legacy_dataset, parametrize_legacy_dataset_fixed, parametrize_legacy_dataset_not_supported) from pyarrow.util import guid +from pyarrow.vendored.version import Version try: import pyarrow.parquet as pq @@ -633,7 +633,7 @@ def test_read_partitioned_directory_s3fs_wrapper( from pyarrow.filesystem import S3FSWrapper - if s3fs.__version__ >= LooseVersion("0.5"): + if Version(s3fs.__version__) >= Version("0.5"): pytest.skip("S3FSWrapper no longer working for s3fs 0.5+") fs, path = s3_example_s3fs diff --git a/python/pyarrow/tests/parquet/test_pandas.py b/python/pyarrow/tests/parquet/test_pandas.py index 5be8a63fa7de..cf41117a3ef8 100644 --- a/python/pyarrow/tests/parquet/test_pandas.py +++ b/python/pyarrow/tests/parquet/test_pandas.py @@ -17,7 +17,6 @@ import io import json -from distutils.version import LooseVersion import numpy as np import pytest @@ -26,6 +25,7 @@ from pyarrow.tests.parquet.common import ( parametrize_legacy_dataset, parametrize_legacy_dataset_not_supported) from pyarrow.util import guid +from pyarrow.vendored.version import Version try: import pyarrow.parquet as pq @@ -559,7 +559,7 @@ def test_write_to_dataset_pandas_preserve_extensiondtypes( tempdir, use_legacy_dataset ): # ARROW-8251 - preserve pandas extension dtypes in roundtrip - if LooseVersion(pd.__version__) < "1.0.0": + if Version(pd.__version__) < Version("1.0.0"): pytest.skip("__arrow_array__ added to pandas in 1.0.0") df = pd.DataFrame({'part': 'a', "col": [1, 2, 3]}) diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py index 427e98874456..26322d7a8b64 100644 --- a/python/pyarrow/tests/test_fs.py +++ b/python/pyarrow/tests/test_fs.py @@ -16,7 +16,6 @@ # under the License. from datetime import datetime, timezone, timedelta -from distutils.version import LooseVersion import gzip import os import pathlib @@ -28,6 +27,8 @@ import pyarrow as pa from pyarrow.tests.test_io import assert_file_not_found +from pyarrow.vendored.version import Version + from pyarrow.fs import (FileType, FileInfo, FileSelector, FileSystem, LocalFileSystem, SubTreeFileSystem, _MockFileSystem, FileSystemHandler, PyFileSystem, FSSpecHandler) @@ -355,7 +356,7 @@ def py_fsspec_memoryfs(request, tempdir): @pytest.fixture def py_fsspec_s3fs(request, s3_connection, s3_server): s3fs = pytest.importorskip("s3fs") - if sys.version_info < (3, 7) and s3fs.__version__ >= LooseVersion("0.5"): + if sys.version_info < (3, 7) and s3fs.__version__ >= Version("0.5"): pytest.skip("s3fs>=0.5 version is async and requires Python >= 3.7") host, port, access_key, secret_key = s3_connection diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index 961f87a33ee1..77c18b839c66 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -23,7 +23,6 @@ from collections import OrderedDict from datetime import date, datetime, time, timedelta, timezone -from distutils.version import LooseVersion import hypothesis as h import hypothesis.extra.pytz as tzst @@ -36,6 +35,7 @@ from pyarrow.pandas_compat import get_logical_type, _pandas_api from pyarrow.tests.util import invoke_script, random_ascii, rands import pyarrow.tests.strategies as past +from pyarrow.vendored.version import Version import pyarrow as pa try: @@ -1042,7 +1042,7 @@ def test_python_datetime_with_pytz_timezone(self, tz): def test_python_datetime_with_timezone_tzinfo(self): from datetime import timezone - if LooseVersion(pd.__version__) > "0.25.0": + if Version(pd.__version__) > Version("0.25.0"): # older pandas versions fail on datetime.timezone.utc (as in input) # vs pytz.UTC (as in result) values = [datetime(2018, 1, 1, 12, 23, 45, tzinfo=timezone.utc)] @@ -1467,8 +1467,9 @@ def test_array_from_pandas_date_with_mask(self): expected = pd.Series([None, date(1991, 1, 1), None]) assert pa.Array.from_pandas(expected).equals(result) - @pytest.mark.skipif('1.16.0' <= LooseVersion(np.__version__) < '1.16.1', - reason='Until numpy/numpy#12745 is resolved') + @pytest.mark.skipif( + Version('1.16.0') <= Version(np.__version__) < Version('1.16.1'), + reason='Until numpy/numpy#12745 is resolved') def test_fixed_offset_timezone(self): df = pd.DataFrame({ 'a': [ @@ -2827,7 +2828,7 @@ def _fully_loaded_dataframe_example(): 9: pd.period_range('2013', periods=10, freq='M') } - if LooseVersion(pd.__version__) >= '0.21': + if Version(pd.__version__) >= Version('0.21'): # There is an issue with pickling IntervalIndex in pandas 0.20.x data[10] = pd.interval_range(start=1, freq=1, periods=10) @@ -2859,8 +2860,9 @@ def _check_serialize_components_roundtrip(pd_obj): tm.assert_series_equal(pd_obj, deserialized) -@pytest.mark.skipif('1.16.0' <= LooseVersion(np.__version__) < '1.16.1', - reason='Until numpy/numpy#12745 is resolved') +@pytest.mark.skipif( + Version('1.16.0') <= Version(np.__version__) < Version('1.16.1'), + reason='Until numpy/numpy#12745 is resolved') def test_serialize_deserialize_pandas(): # ARROW-1784, serialize and deserialize DataFrame by decomposing # BlockManager @@ -2908,7 +2910,7 @@ class A: pa.Table.from_pandas(df) # period unsupported for pandas <= 0.25 - if LooseVersion(pd.__version__) <= '0.25': + if Version(pd.__version__) <= Version('0.25'): df = pd.DataFrame({ 'a': pd.period_range('2000-01-01', periods=20), }) @@ -3817,12 +3819,12 @@ def test_dictionary_from_pandas_specified_type(): def test_array_protocol(): - if LooseVersion(pd.__version__) < '0.24.0': + if Version(pd.__version__) < Version('0.24.0'): pytest.skip('IntegerArray only introduced in 0.24') df = pd.DataFrame({'a': pd.Series([1, 2, None], dtype='Int64')}) - if LooseVersion(pd.__version__) < '0.26.0.dev': + if Version(pd.__version__) < Version('0.26.0.dev'): # with pandas<=0.25, trying to convert nullable integer errors with pytest.raises(TypeError): pa.table(df) @@ -3872,7 +3874,7 @@ def PandasArray__arrow_array__(self, type=None): def test_array_protocol_pandas_extension_types(monkeypatch): # ARROW-7022 - ensure protocol works for Period / Interval extension dtypes - if LooseVersion(pd.__version__) < '0.24.0': + if Version(pd.__version__) < Version('0.24.0'): pytest.skip('Period/IntervalArray only introduced in 0.24') storage = pa.array([1, 2, 3], type=pa.int64()) @@ -3921,7 +3923,7 @@ def _Int64Dtype__from_arrow__(self, array): def test_convert_to_extension_array(monkeypatch): - if LooseVersion(pd.__version__) < "0.26.0.dev": + if Version(pd.__version__) < Version("0.26.0.dev"): pytest.skip("Conversion from IntegerArray to arrow not yet supported") import pandas.core.internals as _int @@ -3949,7 +3951,7 @@ def test_convert_to_extension_array(monkeypatch): tm.assert_frame_equal(result, df2) # monkeypatch pandas Int64Dtype to *not* have the protocol method - if LooseVersion(pd.__version__) < "1.3.0.dev": + if Version(pd.__version__) < Version("1.3.0.dev"): monkeypatch.delattr( pd.core.arrays.integer._IntegerDtype, "__from_arrow__") else: @@ -3977,14 +3979,14 @@ def test_conversion_extensiontype_to_extensionarray(monkeypatch): # converting extension type to linked pandas ExtensionDtype/Array import pandas.core.internals as _int - if LooseVersion(pd.__version__) < "0.24.0": + if Version(pd.__version__) < Version("0.24.0"): pytest.skip("ExtensionDtype introduced in pandas 0.24") storage = pa.array([1, 2, 3, 4], pa.int64()) arr = pa.ExtensionArray.from_storage(MyCustomIntegerType(), storage) table = pa.table({'a': arr}) - if LooseVersion(pd.__version__) < "0.26.0.dev": + if Version(pd.__version__) < Version("0.26.0.dev"): # ensure pandas Int64Dtype has the protocol method (for older pandas) monkeypatch.setattr( pd.Int64Dtype, '__from_arrow__', _Int64Dtype__from_arrow__, @@ -4004,9 +4006,9 @@ def test_conversion_extensiontype_to_extensionarray(monkeypatch): # monkeypatch pandas Int64Dtype to *not* have the protocol method # (remove the version added above and the actual version for recent pandas) - if LooseVersion(pd.__version__) < "0.26.0.dev": + if Version(pd.__version__) < Version("0.26.0.dev"): monkeypatch.delattr(pd.Int64Dtype, "__from_arrow__") - elif LooseVersion(pd.__version__) < "1.3.0.dev": + elif Version(pd.__version__) < Version("1.3.0.dev"): monkeypatch.delattr( pd.core.arrays.integer._IntegerDtype, "__from_arrow__") else: @@ -4023,7 +4025,7 @@ def test_conversion_extensiontype_to_extensionarray(monkeypatch): def test_to_pandas_extension_dtypes_mapping(): - if LooseVersion(pd.__version__) < "0.26.0.dev": + if Version(pd.__version__) < Version("0.26.0.dev"): pytest.skip("Conversion to pandas IntegerArray not yet supported") table = pa.table({'a': pa.array([1, 2, 3], pa.int64())}) @@ -4051,7 +4053,7 @@ def test_to_pandas_extension_dtypes_mapping(): def test_array_to_pandas(): - if LooseVersion(pd.__version__) < "1.1": + if Version(pd.__version__) < Version("1.1"): pytest.skip("ExtensionDtype to_pandas method missing") for arr in [pd.period_range("2012-01-01", periods=3, freq="D").array, diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py index 2dc732fac53d..e87f620070d8 100644 --- a/python/pyarrow/tests/test_schema.py +++ b/python/pyarrow/tests/test_schema.py @@ -20,13 +20,12 @@ import sys import weakref -from distutils.version import LooseVersion - import pytest import numpy as np import pyarrow as pa import pyarrow.tests.util as test_util +from pyarrow.vendored.version import Version def test_schema_constructor_errors(): @@ -656,7 +655,7 @@ def test_schema_from_pandas(): '2010-08-13T05:46:57.437699912' ], dtype='datetime64[ns]'), ] - if LooseVersion(pd.__version__) >= '1.0.0': + if Version(pd.__version__) >= Version('1.0.0'): inputs.append(pd.array([1, 2, None], dtype=pd.Int32Dtype())) for data in inputs: df = pd.DataFrame({'a': data}) diff --git a/python/pyarrow/vendored/__init__.py b/python/pyarrow/vendored/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/pyarrow/vendored/version.py b/python/pyarrow/vendored/version.py new file mode 100644 index 000000000000..b74f1da97837 --- /dev/null +++ b/python/pyarrow/vendored/version.py @@ -0,0 +1,545 @@ +# Vendored from https://github.com/pypa/packaging, +# changeset b5878c977206f60302536db969a8cef420853ade + +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of the +# `packaging` repository for complete details. + +import collections +import itertools +import re +import warnings + +__all__ = ["parse", "Version", "LegacyVersion", + "InvalidVersion", "VERSION_PATTERN"] + + +class InfinityType: + def __repr__(self): + return "Infinity" + + def __hash__(self): + return hash(repr(self)) + + def __lt__(self, other): + return False + + def __le__(self, other): + return False + + def __eq__(self, other): + return isinstance(other, self.__class__) + + def __ne__(self, other): + return not isinstance(other, self.__class__) + + def __gt__(self, other): + return True + + def __ge__(self, other): + return True + + def __neg__(self): + return NegativeInfinity + + +Infinity = InfinityType() + + +class NegativeInfinityType: + def __repr__(self): + return "-Infinity" + + def __hash__(self): + return hash(repr(self)) + + def __lt__(self, other): + return True + + def __le__(self, other): + return True + + def __eq__(self, other): + return isinstance(other, self.__class__) + + def __ne__(self, other): + return not isinstance(other, self.__class__) + + def __gt__(self, other): + return False + + def __ge__(self, other): + return False + + def __neg__(self): + return Infinity + + +NegativeInfinity = NegativeInfinityType() + + +_Version = collections.namedtuple( + "_Version", ["epoch", "release", "dev", "pre", "post", "local"] +) + + +def parse(version): + """ + Parse the given version string and return either a :class:`Version` object + or a :class:`LegacyVersion` object depending on if the given version is + a valid PEP 440 version or a legacy version. + """ + try: + return Version(version) + except InvalidVersion: + return LegacyVersion(version) + + +class InvalidVersion(ValueError): + """ + An invalid version was found, users should refer to PEP 440. + """ + + +class _BaseVersion: + + def __hash__(self): + return hash(self._key) + + # Please keep the duplicated `isinstance` check + # in the six comparisons hereunder + # unless you find a way to avoid adding overhead function calls. + def __lt__(self, other): + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key < other._key + + def __le__(self, other): + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key <= other._key + + def __eq__(self, other): + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key == other._key + + def __ge__(self, other): + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key >= other._key + + def __gt__(self, other): + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key > other._key + + def __ne__(self, other): + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key != other._key + + +class LegacyVersion(_BaseVersion): + def __init__(self, version): + self._version = str(version) + self._key = _legacy_cmpkey(self._version) + + warnings.warn( + "Creating a LegacyVersion has been deprecated and will be " + "removed in the next major release", + DeprecationWarning, + ) + + def __str__(self): + return self._version + + def __repr__(self): + return f"" + + @property + def public(self): + return self._version + + @property + def base_version(self): + return self._version + + @property + def epoch(self): + return -1 + + @property + def release(self): + return None + + @property + def pre(self): + return None + + @property + def post(self): + return None + + @property + def dev(self): + return None + + @property + def local(self): + return None + + @property + def is_prerelease(self): + return False + + @property + def is_postrelease(self): + return False + + @property + def is_devrelease(self): + return False + + +_legacy_version_component_re = re.compile( + r"(\d+ | [a-z]+ | \.| -)", re.VERBOSE) + +_legacy_version_replacement_map = { + "pre": "c", + "preview": "c", + "-": "final-", + "rc": "c", + "dev": "@", +} + + +def _parse_version_parts(s): + for part in _legacy_version_component_re.split(s): + part = _legacy_version_replacement_map.get(part, part) + + if not part or part == ".": + continue + + if part[:1] in "0123456789": + # pad for numeric comparison + yield part.zfill(8) + else: + yield "*" + part + + # ensure that alpha/beta/candidate are before final + yield "*final" + + +def _legacy_cmpkey(version): + + # We hardcode an epoch of -1 here. A PEP 440 version can only have a epoch + # greater than or equal to 0. This will effectively put the LegacyVersion, + # which uses the defacto standard originally implemented by setuptools, + # as before all PEP 440 versions. + epoch = -1 + + # This scheme is taken from pkg_resources.parse_version setuptools prior to + # it's adoption of the packaging library. + parts = [] + for part in _parse_version_parts(version.lower()): + if part.startswith("*"): + # remove "-" before a prerelease tag + if part < "*final": + while parts and parts[-1] == "*final-": + parts.pop() + + # remove trailing zeros from each series of numeric parts + while parts and parts[-1] == "00000000": + parts.pop() + + parts.append(part) + + return epoch, tuple(parts) + + +# Deliberately not anchored to the start and end of the string, to make it +# easier for 3rd party code to reuse +VERSION_PATTERN = r""" + v? + (?: + (?:(?P[0-9]+)!)? # epoch + (?P[0-9]+(?:\.[0-9]+)*) # release segment + (?P
                                          # pre-release
+            [-_\.]?
+            (?P(a|b|c|rc|alpha|beta|pre|preview))
+            [-_\.]?
+            (?P[0-9]+)?
+        )?
+        (?P                                         # post release
+            (?:-(?P[0-9]+))
+            |
+            (?:
+                [-_\.]?
+                (?Ppost|rev|r)
+                [-_\.]?
+                (?P[0-9]+)?
+            )
+        )?
+        (?P                                          # dev release
+            [-_\.]?
+            (?Pdev)
+            [-_\.]?
+            (?P[0-9]+)?
+        )?
+    )
+    (?:\+(?P[a-z0-9]+(?:[-_\.][a-z0-9]+)*))?       # local version
+"""
+
+
+class Version(_BaseVersion):
+
+    _regex = re.compile(r"^\s*" + VERSION_PATTERN +
+                        r"\s*$", re.VERBOSE | re.IGNORECASE)
+
+    def __init__(self, version):
+
+        # Validate the version and parse it into pieces
+        match = self._regex.search(version)
+        if not match:
+            raise InvalidVersion(f"Invalid version: '{version}'")
+
+        # Store the parsed out pieces of the version
+        self._version = _Version(
+            epoch=int(match.group("epoch")) if match.group("epoch") else 0,
+            release=tuple(int(i) for i in match.group("release").split(".")),
+            pre=_parse_letter_version(
+                match.group("pre_l"), match.group("pre_n")),
+            post=_parse_letter_version(
+                match.group("post_l"), match.group(
+                    "post_n1") or match.group("post_n2")
+            ),
+            dev=_parse_letter_version(
+                match.group("dev_l"), match.group("dev_n")),
+            local=_parse_local_version(match.group("local")),
+        )
+
+        # Generate a key which will be used for sorting
+        self._key = _cmpkey(
+            self._version.epoch,
+            self._version.release,
+            self._version.pre,
+            self._version.post,
+            self._version.dev,
+            self._version.local,
+        )
+
+    def __repr__(self):
+        return f""
+
+    def __str__(self):
+        parts = []
+
+        # Epoch
+        if self.epoch != 0:
+            parts.append(f"{self.epoch}!")
+
+        # Release segment
+        parts.append(".".join(str(x) for x in self.release))
+
+        # Pre-release
+        if self.pre is not None:
+            parts.append("".join(str(x) for x in self.pre))
+
+        # Post-release
+        if self.post is not None:
+            parts.append(f".post{self.post}")
+
+        # Development release
+        if self.dev is not None:
+            parts.append(f".dev{self.dev}")
+
+        # Local version segment
+        if self.local is not None:
+            parts.append(f"+{self.local}")
+
+        return "".join(parts)
+
+    @property
+    def epoch(self):
+        _epoch = self._version.epoch
+        return _epoch
+
+    @property
+    def release(self):
+        _release = self._version.release
+        return _release
+
+    @property
+    def pre(self):
+        _pre = self._version.pre
+        return _pre
+
+    @property
+    def post(self):
+        return self._version.post[1] if self._version.post else None
+
+    @property
+    def dev(self):
+        return self._version.dev[1] if self._version.dev else None
+
+    @property
+    def local(self):
+        if self._version.local:
+            return ".".join(str(x) for x in self._version.local)
+        else:
+            return None
+
+    @property
+    def public(self):
+        return str(self).split("+", 1)[0]
+
+    @property
+    def base_version(self):
+        parts = []
+
+        # Epoch
+        if self.epoch != 0:
+            parts.append(f"{self.epoch}!")
+
+        # Release segment
+        parts.append(".".join(str(x) for x in self.release))
+
+        return "".join(parts)
+
+    @property
+    def is_prerelease(self):
+        return self.dev is not None or self.pre is not None
+
+    @property
+    def is_postrelease(self):
+        return self.post is not None
+
+    @property
+    def is_devrelease(self):
+        return self.dev is not None
+
+    @property
+    def major(self):
+        return self.release[0] if len(self.release) >= 1 else 0
+
+    @property
+    def minor(self):
+        return self.release[1] if len(self.release) >= 2 else 0
+
+    @property
+    def micro(self):
+        return self.release[2] if len(self.release) >= 3 else 0
+
+
+def _parse_letter_version(letter, number):
+
+    if letter:
+        # We consider there to be an implicit 0 in a pre-release if there is
+        # not a numeral associated with it.
+        if number is None:
+            number = 0
+
+        # We normalize any letters to their lower case form
+        letter = letter.lower()
+
+        # We consider some words to be alternate spellings of other words and
+        # in those cases we want to normalize the spellings to our preferred
+        # spelling.
+        if letter == "alpha":
+            letter = "a"
+        elif letter == "beta":
+            letter = "b"
+        elif letter in ["c", "pre", "preview"]:
+            letter = "rc"
+        elif letter in ["rev", "r"]:
+            letter = "post"
+
+        return letter, int(number)
+    if not letter and number:
+        # We assume if we are given a number, but we are not given a letter
+        # then this is using the implicit post release syntax (e.g. 1.0-1)
+        letter = "post"
+
+        return letter, int(number)
+
+    return None
+
+
+_local_version_separators = re.compile(r"[\._-]")
+
+
+def _parse_local_version(local):
+    """
+    Takes a string like abc.1.twelve and turns it into ("abc", 1, "twelve").
+    """
+    if local is not None:
+        return tuple(
+            part.lower() if not part.isdigit() else int(part)
+            for part in _local_version_separators.split(local)
+        )
+    return None
+
+
+def _cmpkey(epoch, release, pre, post, dev, local):
+
+    # When we compare a release version, we want to compare it with all of the
+    # trailing zeros removed. So we'll use a reverse the list, drop all the now
+    # leading zeros until we come to something non zero, then take the rest
+    # re-reverse it back into the correct order and make it a tuple and use
+    # that for our sorting key.
+    _release = tuple(
+        reversed(list(itertools.dropwhile(lambda x: x == 0,
+                                          reversed(release))))
+    )
+
+    # We need to "trick" the sorting algorithm to put 1.0.dev0 before 1.0a0.
+    # We'll do this by abusing the pre segment, but we _only_ want to do this
+    # if there is not a pre or a post segment. If we have one of those then
+    # the normal sorting rules will handle this case correctly.
+    if pre is None and post is None and dev is not None:
+        _pre = NegativeInfinity
+    # Versions without a pre-release (except as noted above) should sort after
+    # those with one.
+    elif pre is None:
+        _pre = Infinity
+    else:
+        _pre = pre
+
+    # Versions without a post segment should sort before those with one.
+    if post is None:
+        _post = NegativeInfinity
+
+    else:
+        _post = post
+
+    # Versions without a development segment should sort after those with one.
+    if dev is None:
+        _dev = Infinity
+
+    else:
+        _dev = dev
+
+    if local is None:
+        # Versions without a local segment should sort before those with one.
+        _local = NegativeInfinity
+    else:
+        # Versions with a local segment need that segment parsed to implement
+        # the sorting rules in PEP440.
+        # - Alpha numeric segments sort before numeric segments
+        # - Alpha numeric segments sort lexicographically
+        # - Numeric segments sort numerically
+        # - Shorter versions sort before longer versions when the prefixes
+        #   match exactly
+        _local = tuple(
+            (i, "") if isinstance(i, int) else (NegativeInfinity, i)
+            for i in local
+        )
+
+    return epoch, _release, _pre, _post, _dev, _local
diff --git a/python/setup.py b/python/setup.py
index dbd67c7ede38..5e0d46441027 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -26,14 +26,11 @@
 import shlex
 import shutil
 import sys
+import sysconfig
 
 import pkg_resources
 from setuptools import setup, Extension, Distribution
 
-from distutils.command.clean import clean as _clean
-from distutils.util import strtobool
-from distutils import sysconfig
-
 from Cython.Distutils import build_ext as _build_ext
 import Cython
 
@@ -62,15 +59,21 @@ def changed_dir(dirname):
         os.chdir(oldcwd)
 
 
-class clean(_clean):
+def strtobool(val):
+    """Convert a string representation of truth to true (1) or false (0).
 
-    def run(self):
-        _clean.run(self)
-        for x in []:
-            try:
-                os.remove(x)
-            except OSError:
-                pass
+    True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
+    are 'n', 'no', 'f', 'false', 'off', and '0'.  Raises ValueError if
+    'val' is anything else.
+    """
+    # Copied from distutils
+    val = val.lower()
+    if val in ('y', 'yes', 't', 'true', 'on', '1'):
+        return 1
+    elif val in ('n', 'no', 'f', 'false', 'off', '0'):
+        return 0
+    else:
+        raise ValueError("invalid truth value %r" % (val,))
 
 
 class build_ext(_build_ext):
@@ -588,7 +591,6 @@ def has_ext_modules(foo):
     # Dummy extension to trigger build_ext
     ext_modules=[Extension('__dummy__', sources=[])],
     cmdclass={
-        'clean': clean,
         'build_ext': build_ext
     },
     entry_points={

From 30875b3f8845dd86a5860bd548940ebd390b3e0c Mon Sep 17 00:00:00 2001
From: Antoine Pitrou 
Date: Tue, 30 Mar 2021 18:07:14 +0200
Subject: [PATCH 2/5] Fix for test_fs.py

---
 python/pyarrow/tests/test_fs.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py
index 26322d7a8b64..1beecc66b122 100644
--- a/python/pyarrow/tests/test_fs.py
+++ b/python/pyarrow/tests/test_fs.py
@@ -356,7 +356,8 @@ def py_fsspec_memoryfs(request, tempdir):
 @pytest.fixture
 def py_fsspec_s3fs(request, s3_connection, s3_server):
     s3fs = pytest.importorskip("s3fs")
-    if sys.version_info < (3, 7) and s3fs.__version__ >= Version("0.5"):
+    if (sys.version_info < (3, 7) and
+            Version(s3fs.__version__) >= Version("0.5")):
         pytest.skip("s3fs>=0.5 version is async and requires Python >= 3.7")
 
     host, port, access_key, secret_key = s3_connection

From 4bab6efd032d368fa2880a8a015b50393e455bd4 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou 
Date: Tue, 30 Mar 2021 18:07:32 +0200
Subject: [PATCH 3/5] Try random fix for AppVeyor

---
 python/setup.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/python/setup.py b/python/setup.py
index 5e0d46441027..f96eaa02585b 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -453,12 +453,7 @@ def get_ext_built_api_header(self, name):
     def get_ext_built(self, name):
         if sys.platform == 'win32':
             head, tail = os.path.split(name)
-            # Visual Studio seems to differ from other generators in
-            # where it places output files.
-            if self.cmake_generator.startswith('Visual Studio'):
-                return pjoin(head, self.build_type, tail + ext_suffix)
-            else:
-                return pjoin(head, tail + ext_suffix)
+            return pjoin(head, self.build_type, tail + ext_suffix)
         else:
             return pjoin(self.build_type, name + ext_suffix)
 

From 31d99858b581aac7f6df9f321ffbe4fd9c9d2127 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou 
Date: Wed, 31 Mar 2021 14:44:07 +0200
Subject: [PATCH 4/5] Should fix AppVeyor failure and RAT checks

---
 dev/release/rat_exclude_files.txt   |  1 +
 python/pyarrow/vendored/__init__.py | 16 ++++++++++++++++
 python/setup.py                     | 18 ++++++++++++------
 3 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt
index 6eb4beedb75c..c2e22abd39c8 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -136,6 +136,7 @@ python/MANIFEST.in
 python/manylinux1/.dockerignore
 python/pyarrow/includes/__init__.pxd
 python/pyarrow/tests/__init__.py
+python/pyarrow/vendored/*
 python/requirements*.txt
 pax_global_header
 MANIFEST.in
diff --git a/python/pyarrow/vendored/__init__.py b/python/pyarrow/vendored/__init__.py
index e69de29bb2d1..13a83393a912 100644
--- a/python/pyarrow/vendored/__init__.py
+++ b/python/pyarrow/vendored/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/python/setup.py b/python/setup.py
index f96eaa02585b..a2abb0501778 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -26,7 +26,12 @@
 import shlex
 import shutil
 import sys
-import sysconfig
+
+if sys.version_info >= (3, 10):
+    import sysconfig
+else:
+    # Get correct EXT_SUFFIX on Windows (https://bugs.python.org/issue39825)
+    from distutils import sysconfig
 
 import pkg_resources
 from setuptools import setup, Extension, Distribution
@@ -42,11 +47,7 @@
 
 setup_dir = os.path.abspath(os.path.dirname(__file__))
 
-
 ext_suffix = sysconfig.get_config_var('EXT_SUFFIX')
-if ext_suffix is None:
-    # https://bugs.python.org/issue19555
-    ext_suffix = sysconfig.get_config_var('SO')
 
 
 @contextlib.contextmanager
@@ -453,7 +454,12 @@ def get_ext_built_api_header(self, name):
     def get_ext_built(self, name):
         if sys.platform == 'win32':
             head, tail = os.path.split(name)
-            return pjoin(head, self.build_type, tail + ext_suffix)
+            # Visual Studio seems to differ from other generators in
+            # where it places output files.
+            if self.cmake_generator.startswith('Visual Studio'):
+                return pjoin(head, self.build_type, tail + ext_suffix)
+            else:
+                return pjoin(head, tail + ext_suffix)
         else:
             return pjoin(self.build_type, name + ext_suffix)
 

From 28a05bf6bce13b60fdc8cbddaa5042532132b291 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou 
Date: Thu, 1 Apr 2021 14:12:24 +0200
Subject: [PATCH 5/5] Add copyright note

---
 LICENSE.txt | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/LICENSE.txt b/LICENSE.txt
index 1480c1401c06..4cec07fd0c99 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -2209,3 +2209,12 @@ The files in cpp/src/arrow/vendored/fast_float/ contain code from
 https://github.com/lemire/fast_float
 
 which is made available under the Apache License 2.0.
+
+--------------------------------------------------------------------------------
+
+The file python/pyarrow/vendored/version.py contains code from
+
+https://github.com/pypa/packaging/
+
+which is made available under both the Apache license v2.0 and the
+BSD 2-clause license.