@@ -124,6 +124,21 @@ def df_full():
124124 )
125125
126126
127+ @pytest .fixture (
128+ params = [
129+ datetime .datetime .now (datetime .timezone .utc ),
130+ datetime .datetime .now (datetime .timezone .min ),
131+ datetime .datetime .now (datetime .timezone .max ),
132+ datetime .datetime .strptime ("2019-01-04T16:41:24+0200" , "%Y-%m-%dT%H:%M:%S%z" ),
133+ datetime .datetime .strptime ("2019-01-04T16:41:24+0215" , "%Y-%m-%dT%H:%M:%S%z" ),
134+ datetime .datetime .strptime ("2019-01-04T16:41:24-0200" , "%Y-%m-%dT%H:%M:%S%z" ),
135+ datetime .datetime .strptime ("2019-01-04T16:41:24-0215" , "%Y-%m-%dT%H:%M:%S%z" ),
136+ ]
137+ )
138+ def timezone_aware_date_list (request ):
139+ return request .param
140+
141+
127142def check_round_trip (
128143 df ,
129144 engine = None ,
@@ -133,6 +148,7 @@ def check_round_trip(
133148 expected = None ,
134149 check_names = True ,
135150 check_like = False ,
151+ check_dtype = True ,
136152 repeat = 2 ,
137153):
138154 """Verify parquet serializer and deserializer produce the same results.
@@ -174,7 +190,11 @@ def compare(repeat):
174190 actual = read_parquet (path , ** read_kwargs )
175191
176192 tm .assert_frame_equal (
177- expected , actual , check_names = check_names , check_like = check_like
193+ expected ,
194+ actual ,
195+ check_names = check_names ,
196+ check_like = check_like ,
197+ check_dtype = check_dtype ,
178198 )
179199
180200 if path is None :
@@ -724,6 +744,21 @@ def test_timestamp_nanoseconds(self, pa):
724744 df = pd .DataFrame ({"a" : pd .date_range ("2017-01-01" , freq = "1n" , periods = 10 )})
725745 check_round_trip (df , pa , write_kwargs = {"version" : "2.0" })
726746
747+ def test_timezone_aware_index (self , pa , timezone_aware_date_list ):
748+ idx = 5 * [timezone_aware_date_list ]
749+ df = pd .DataFrame (index = idx , data = {"index_as_col" : idx })
750+
751+ # see gh-36004
752+ # compare time(zone) values only, skip their class:
753+ # pyarrow always creates fixed offset timezones using pytz.FixedOffset()
754+ # even if it was datetime.timezone() originally
755+ #
756+ # technically they are the same:
757+ # they both implement datetime.tzinfo
758+ # they both wrap datetime.timedelta()
759+ # this use-case sets the resolution to 1 minute
760+ check_round_trip (df , pa , check_dtype = False )
761+
727762 @td .skip_if_no ("pyarrow" , min_version = "0.17" )
728763 def test_filter_row_groups (self , pa ):
729764 # https://github.com/pandas-dev/pandas/issues/26551
@@ -862,3 +897,12 @@ def test_empty_dataframe(self, fp):
862897 expected = df .copy ()
863898 expected .index .name = "index"
864899 check_round_trip (df , fp , expected = expected )
900+
901+ def test_timezone_aware_index (self , fp , timezone_aware_date_list ):
902+ idx = 5 * [timezone_aware_date_list ]
903+
904+ df = pd .DataFrame (index = idx , data = {"index_as_col" : idx })
905+
906+ expected = df .copy ()
907+ expected .index .name = "index"
908+ check_round_trip (df , fp , expected = expected )
0 commit comments