From 64c0d83abfdb7969d136a5820d0c9a18b28a842f Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 24 May 2023 08:39:43 +0200 Subject: [PATCH 1/3] GH-35040: [Python] Skip test_cast_timestamp_to_string on Windows because it requires tz database --- python/pyarrow/tests/test_scalars.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py index ca2d29e5dac..30c4f5a4147 100644 --- a/python/pyarrow/tests/test_scalars.py +++ b/python/pyarrow/tests/test_scalars.py @@ -19,6 +19,7 @@ import decimal import pickle import pytest +import sys import weakref import numpy as np @@ -295,6 +296,9 @@ def test_cast(): pa.scalar('foo').cast('int32') +# TODO: We should test on windows once ARROW-13168 is resolved. +@pytest.mark.skipif(sys.platform == 'win32', + reason="Timezone database is not available on Windows yet") def test_cast_timestamp_to_string(): # GH-35370 pytest.importorskip("pytz") From 27a91154cdafc4ea8bd784abcfdd93695b98bea7 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 31 May 2023 11:51:45 +0200 Subject: [PATCH 2/3] actually check if tz database is available or not on Windows --- python/pyarrow/tests/test_compute.py | 36 ++++++++++++++-------------- python/pyarrow/tests/test_scalars.py | 6 ++--- python/pyarrow/tests/util.py | 9 +++++++ 3 files changed, 30 insertions(+), 21 deletions(-) diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 875d0e613b6..ad0e71e9532 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -36,6 +36,8 @@ import pyarrow as pa import pyarrow.compute as pc from pyarrow.lib import ArrowNotImplementedError +from pyarrow.tests import util + all_array_types = [ ('bool', [True, False, False, True, True]), @@ -180,17 +182,19 @@ def test_option_class_equality(): pc.WeekOptions(week_starts_monday=True, count_from_zero=False, first_week_is_fully_in_year=False), ] - # TODO: We should test on windows once ARROW-13168 is resolved. - # Timezone database is not available on Windows yet - if sys.platform != 'win32': + # Timezone database might not be installed on Windows + if sys.platform != "win32" or util.windows_has_tzdata(): options.append(pc.AssumeTimezoneOptions("Europe/Ljubljana")) classes = {type(option) for option in options} for cls in exported_option_classes: - # Timezone database is not available on Windows yet - if cls not in classes and sys.platform != 'win32' and \ - cls != pc.AssumeTimezoneOptions: + # Timezone database might not be installed on Windows + if ( + cls not in classes + and (sys.platform != "win32" or util.windows_has_tzdata()) + and cls != pc.AssumeTimezoneOptions + ): try: options.append(cls()) except TypeError: @@ -1846,10 +1850,9 @@ def test_strptime(): assert got == pa.array([None, None, None], type=pa.timestamp('s')) -# TODO: We should test on windows once ARROW-13168 is resolved. @pytest.mark.pandas -@pytest.mark.skipif(sys.platform == 'win32', - reason="Timezone database is not available on Windows yet") +@pytest.mark.skipif(sys.platform == "win32" and not util.windows_has_tzdata(), + reason="Timezone database is not installed on Windows") def test_strftime(): times = ["2018-03-10 09:00", "2038-01-31 12:23", None] timezones = ["CET", "UTC", "Europe/Ljubljana"] @@ -2029,18 +2032,16 @@ def test_extract_datetime_components(): _check_datetime_components(timestamps) # Test timezone aware timestamp array - if sys.platform == 'win32': - # TODO: We should test on windows once ARROW-13168 is resolved. - pytest.skip('Timezone database is not available on Windows yet') + if sys.platform == "win32" and not util.windows_has_tzdata(): + pytest.skip('Timezone database is not installed on Windows') else: for timezone in timezones: _check_datetime_components(timestamps, timezone) -# TODO: We should test on windows once ARROW-13168 is resolved. @pytest.mark.pandas -@pytest.mark.skipif(sys.platform == 'win32', - reason="Timezone database is not available on Windows yet") +@pytest.mark.skipif(sys.platform == "win32" and not util.windows_has_tzdata(), + reason="Timezone database is not installed on Windows") def test_assume_timezone(): ts_type = pa.timestamp("ns") timestamps = pd.to_datetime(["1970-01-01T00:00:59.123456789", @@ -2235,9 +2236,8 @@ def _check_temporal_rounding(ts, values, unit): np.testing.assert_array_equal(result, expected) -# TODO: We should test on windows once ARROW-13168 is resolved. -@pytest.mark.skipif(sys.platform == 'win32', - reason="Timezone database is not available on Windows yet") +@pytest.mark.skipif(sys.platform == "win32" and not util.windows_has_tzdata(), + reason="Timezone database is not installed on Windows") @pytest.mark.parametrize('unit', ("nanosecond", "microsecond", "millisecond", "second", "minute", "hour", "day")) @pytest.mark.pandas diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py index 30c4f5a4147..712221feaf2 100644 --- a/python/pyarrow/tests/test_scalars.py +++ b/python/pyarrow/tests/test_scalars.py @@ -26,6 +26,7 @@ import pyarrow as pa import pyarrow.compute as pc +from pyarrow.tests import util @pytest.mark.parametrize(['value', 'ty', 'klass'], [ @@ -296,9 +297,8 @@ def test_cast(): pa.scalar('foo').cast('int32') -# TODO: We should test on windows once ARROW-13168 is resolved. -@pytest.mark.skipif(sys.platform == 'win32', - reason="Timezone database is not available on Windows yet") +@pytest.mark.skipif(sys.platform == "win32" and not util.windows_has_tzdata(), + reason="Timezone database is not installed on Windows") def test_cast_timestamp_to_string(): # GH-35370 pytest.importorskip("pytz") diff --git a/python/pyarrow/tests/util.py b/python/pyarrow/tests/util.py index df7936371ee..0b69deb73ba 100644 --- a/python/pyarrow/tests/util.py +++ b/python/pyarrow/tests/util.py @@ -448,3 +448,12 @@ def _configure_s3_limited_user(s3_server, policy): except FileNotFoundError: pytest.skip("Configuring limited s3 user failed") + + +def windows_has_tzdata(): + """ + This is the default location where tz.cpp will look for (until we make + this configurable at run-time) + """ + tzdata_path = os.path.expandvars(r"%USERPROFILE%\Downloads\tzdata") + return os.path.exists(tzdata_path) From 1028a029cad98be386d0f2f394e4744b517a77f2 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 31 May 2023 14:09:52 +0200 Subject: [PATCH 3/3] don't test locale specific ones on windows --- python/pyarrow/tests/test_compute.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index ad0e71e9532..f934edd3c3b 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -1857,9 +1857,11 @@ def test_strftime(): times = ["2018-03-10 09:00", "2038-01-31 12:23", None] timezones = ["CET", "UTC", "Europe/Ljubljana"] - formats = ["%a", "%A", "%w", "%d", "%b", "%B", "%m", "%y", "%Y", "%H", - "%I", "%p", "%M", "%z", "%Z", "%j", "%U", "%W", "%c", "%x", - "%X", "%%", "%G", "%V", "%u"] + formats = ["%a", "%A", "%w", "%d", "%b", "%B", "%m", "%y", "%Y", "%H", "%I", + "%p", "%M", "%z", "%Z", "%j", "%U", "%W", "%%", "%G", "%V", "%u"] + if sys.platform != "win32": + # Locale-dependent formats don't match on Windows + formats.extend(["%c", "%x", "%X"]) for timezone in timezones: ts = pd.to_datetime(times).tz_localize(timezone)