1+ from string import ascii_letters as letters
2+
13import numpy as np
24import pytest
35
68import pandas ._testing as tm
79import pandas .core .common as com
810
11+ msg = "A value is trying to be set on a copy of a slice from a DataFrame"
12+
13+
14+ def random_text (nobs = 100 ):
15+ df = []
16+ for i in range (nobs ):
17+ idx = np .random .randint (len (letters ), size = 2 )
18+ idx .sort ()
19+
20+ df .append ([letters [idx [0 ] : idx [1 ]]])
21+
22+ return DataFrame (df , columns = ["letters" ])
23+
924
1025class TestCaching :
1126 def test_slice_consolidate_invalidate_item_cache (self ):
@@ -30,23 +45,24 @@ def test_slice_consolidate_invalidate_item_cache(self):
3045 df ._clear_item_cache ()
3146 tm .assert_almost_equal (df ["bb" ][0 ], 0.17 )
3247
33- def test_setitem_cache_updating (self ):
48+ @pytest .mark .parametrize ("do_ref" , [True , False ])
49+ def test_setitem_cache_updating (self , do_ref ):
3450 # GH 5424
3551 cont = ["one" , "two" , "three" , "four" , "five" , "six" , "seven" ]
3652
37- for do_ref in [True , False ]:
38- df = DataFrame ({"a" : cont , "b" : cont [3 :] + cont [:3 ], "c" : np .arange (7 )})
53+ df = DataFrame ({"a" : cont , "b" : cont [3 :] + cont [:3 ], "c" : np .arange (7 )})
3954
40- # ref the cache
41- if do_ref :
42- df .loc [0 , "c" ]
55+ # ref the cache
56+ if do_ref :
57+ df .loc [0 , "c" ]
4358
44- # set it
45- df .loc [7 , "c" ] = 1
59+ # set it
60+ df .loc [7 , "c" ] = 1
4661
47- assert df .loc [0 , "c" ] == 0.0
48- assert df .loc [7 , "c" ] == 1.0
62+ assert df .loc [0 , "c" ] == 0.0
63+ assert df .loc [7 , "c" ] == 1.0
4964
65+ def test_setitem_cache_updating_slices (self ):
5066 # GH 7084
5167 # not updating cache on series setting with slices
5268 expected = DataFrame (
@@ -146,6 +162,9 @@ def test_detect_chained_assignment(self):
146162 df ["A" ][1 ] = - 6
147163 tm .assert_frame_equal (df , expected )
148164
165+ @pytest .mark .arm_slow
166+ def test_detect_chained_assignment_raises (self ):
167+
149168 # test with the chaining
150169 df = DataFrame (
151170 {
@@ -155,7 +174,6 @@ def test_detect_chained_assignment(self):
155174 )
156175 assert df ._is_copy is None
157176
158- msg = "A value is trying to be set on a copy of a slice from a DataFrame"
159177 with pytest .raises (com .SettingWithCopyError , match = msg ):
160178 df ["A" ][0 ] = - 5
161179
@@ -164,6 +182,9 @@ def test_detect_chained_assignment(self):
164182
165183 assert df ["A" ]._is_copy is None
166184
185+ @pytest .mark .arm_slow
186+ def test_detect_chained_assignment_fails (self ):
187+
167188 # Using a copy (the chain), fails
168189 df = DataFrame (
169190 {
@@ -175,6 +196,9 @@ def test_detect_chained_assignment(self):
175196 with pytest .raises (com .SettingWithCopyError , match = msg ):
176197 df .loc [0 ]["A" ] = - 5
177198
199+ @pytest .mark .arm_slow
200+ def test_detect_chained_assignment_doc_example (self ):
201+
178202 # Doc example
179203 df = DataFrame (
180204 {
@@ -188,6 +212,9 @@ def test_detect_chained_assignment(self):
188212 indexer = df .a .str .startswith ("o" )
189213 df [indexer ]["c" ] = 42
190214
215+ @pytest .mark .arm_slow
216+ def test_detect_chained_assignment_object_dtype (self ):
217+
191218 expected = DataFrame ({"A" : [111 , "bbb" , "ccc" ], "B" : [1 , 2 , 3 ]})
192219 df = DataFrame ({"A" : ["aaa" , "bbb" , "ccc" ], "B" : [1 , 2 , 3 ]})
193220
@@ -200,6 +227,9 @@ def test_detect_chained_assignment(self):
200227 df .loc [0 , "A" ] = 111
201228 tm .assert_frame_equal (df , expected )
202229
230+ @pytest .mark .arm_slow
231+ def test_detect_chained_assignment_is_copy_pickle (self ):
232+
203233 # gh-5475: Make sure that is_copy is picked up reconstruction
204234 df = DataFrame ({"A" : [1 , 2 ]})
205235 assert df ._is_copy is None
@@ -210,18 +240,10 @@ def test_detect_chained_assignment(self):
210240 df2 ["B" ] = df2 ["A" ]
211241 df2 ["B" ] = df2 ["A" ]
212242
213- # gh-5597: a spurious raise as we are setting the entire column here
214- from string import ascii_letters as letters
215-
216- def random_text (nobs = 100 ):
217- df = []
218- for i in range (nobs ):
219- idx = np .random .randint (len (letters ), size = 2 )
220- idx .sort ()
221-
222- df .append ([letters [idx [0 ] : idx [1 ]]])
243+ @pytest .mark .arm_slow
244+ def test_detect_chained_assignment_setting_entire_column (self ):
223245
224- return DataFrame ( df , columns = [ "letters" ])
246+ # gh-5597: a spurious raise as we are setting the entire column here
225247
226248 df = random_text (100000 )
227249
@@ -239,6 +261,9 @@ def random_text(nobs=100):
239261 assert df ._is_copy is None
240262 df ["letters" ] = df ["letters" ].apply (str .lower )
241263
264+ @pytest .mark .arm_slow
265+ def test_detect_chained_assignment_implicit_take (self ):
266+
242267 # Implicitly take
243268 df = random_text (100000 )
244269 indexer = df .letters .apply (lambda x : len (x ) > 10 )
@@ -247,6 +272,9 @@ def random_text(nobs=100):
247272 assert df ._is_copy is not None
248273 df ["letters" ] = df ["letters" ].apply (str .lower )
249274
275+ @pytest .mark .arm_slow
276+ def test_detect_chained_assignment_implicit_take2 (self ):
277+
250278 # Implicitly take 2
251279 df = random_text (100000 )
252280 indexer = df .letters .apply (lambda x : len (x ) > 10 )
@@ -261,20 +289,32 @@ def random_text(nobs=100):
261289 df ["letters" ] = df ["letters" ].apply (str .lower )
262290 assert df ._is_copy is None
263291
292+ @pytest .mark .arm_slow
293+ def test_detect_chained_assignment_str (self ):
294+
264295 df = random_text (100000 )
265296 indexer = df .letters .apply (lambda x : len (x ) > 10 )
266297 df .loc [indexer , "letters" ] = df .loc [indexer , "letters" ].apply (str .lower )
267298
299+ @pytest .mark .arm_slow
300+ def test_detect_chained_assignment_is_copy (self ):
301+
268302 # an identical take, so no copy
269303 df = DataFrame ({"a" : [1 ]}).dropna ()
270304 assert df ._is_copy is None
271305 df ["a" ] += 1
272306
307+ @pytest .mark .arm_slow
308+ def test_detect_chained_assignment_sorting (self ):
309+
273310 df = DataFrame (np .random .randn (10 , 4 ))
274- s = df .iloc [:, 0 ].sort_values ()
311+ ser = df .iloc [:, 0 ].sort_values ()
275312
276- tm .assert_series_equal (s , df .iloc [:, 0 ].sort_values ())
277- tm .assert_series_equal (s , df [0 ].sort_values ())
313+ tm .assert_series_equal (ser , df .iloc [:, 0 ].sort_values ())
314+ tm .assert_series_equal (ser , df [0 ].sort_values ())
315+
316+ @pytest .mark .arm_slow
317+ def test_detect_chained_assignment_false_positives (self ):
278318
279319 # see gh-6025: false positives
280320 df = DataFrame ({"column1" : ["a" , "a" , "a" ], "column2" : [4 , 8 , 9 ]})
@@ -289,6 +329,9 @@ def random_text(nobs=100):
289329 df ["column1" ] = df ["column1" ] + "c"
290330 str (df )
291331
332+ @pytest .mark .arm_slow
333+ def test_detect_chained_assignment_undefined_column (self ):
334+
292335 # from SO:
293336 # https://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc
294337 df = DataFrame (np .arange (0 , 9 ), columns = ["count" ])
@@ -297,6 +340,9 @@ def random_text(nobs=100):
297340 with pytest .raises (com .SettingWithCopyError , match = msg ):
298341 df .iloc [0 :5 ]["group" ] = "a"
299342
343+ @pytest .mark .arm_slow
344+ def test_detect_chained_assignment_changing_dtype (self ):
345+
300346 # Mixed type setting but same dtype & changing dtype
301347 df = DataFrame (
302348 {
@@ -324,7 +370,6 @@ def test_setting_with_copy_bug(self):
324370 )
325371 mask = pd .isna (df .c )
326372
327- msg = "A value is trying to be set on a copy of a slice from a DataFrame"
328373 with pytest .raises (com .SettingWithCopyError , match = msg ):
329374 df [["c" ]][mask ] = df [["b" ]][mask ]
330375
@@ -342,7 +387,6 @@ def test_detect_chained_assignment_warnings_errors(self):
342387 with tm .assert_produces_warning (com .SettingWithCopyWarning ):
343388 df .loc [0 ]["A" ] = 111
344389
345- msg = "A value is trying to be set on a copy of a slice from a DataFrame"
346390 with option_context ("chained_assignment" , "raise" ):
347391 with pytest .raises (com .SettingWithCopyError , match = msg ):
348392 df .loc [0 ]["A" ] = 111
@@ -386,6 +430,7 @@ def test_cache_updating(self):
386430 assert "Hello Friend" in df ["A" ].index
387431 assert "Hello Friend" in df ["B" ].index
388432
433+ def test_cache_updating2 (self ):
389434 # 10264
390435 df = DataFrame (
391436 np .zeros ((5 , 5 ), dtype = "int64" ),
0 commit comments