From 38d05021ab64dce86ac750e583da4bb139733838 Mon Sep 17 00:00:00 2001 From: GYvan <78673871+GYvan@users.noreply.github.com> Date: Fri, 16 Jul 2021 00:36:01 +0200 Subject: [PATCH 1/9] Modified ecosystem.rst to include ibis --- doc/source/ecosystem.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst index ee061e7b7d3e6..e58779c090d8f 100644 --- a/doc/source/ecosystem.rst +++ b/doc/source/ecosystem.rst @@ -445,6 +445,12 @@ provides a familiar ``DataFrame`` interface for out-of-core, parallel and distri Dask-ML enables parallel and distributed machine learning using Dask alongside existing machine learning libraries like Scikit-Learn, XGBoost, and TensorFlow. +`Ibis `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Ibis offers a standard way to write analytics code, that can be run in multiple engines. It helps in bridging the gap between local Python environments (like pandas) and remote storage and execution systems like Hadoop components (like HDFS, Impala, Hive, Spark) and SQL databases (Postgres, etc.). + + `Koalas `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 85c554c41f8d44586c689b43b69281a42ab2a0cc Mon Sep 17 00:00:00 2001 From: GYvan <78673871+GYvan@users.noreply.github.com> Date: Thu, 22 Jul 2021 16:50:00 +0200 Subject: [PATCH 2/9] created a test for issue #25594 --- pandas/tests/indexing/test_loc.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 6de83e34122c2..40ea27379bdc2 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2768,3 +2768,24 @@ def test_loc_setitem_dict_timedelta_multiple_set(self): [[Timedelta(6, unit="s"), "foo"]], columns=["time", "value"], index=[1] ) tm.assert_frame_equal(result, expected) + + + def test_loc_set_multiple_items_in_multiple_new_columns(self): + # GH 25594 + df = DataFrame(index=[1, 2], columns = ['a']) + df.loc[1, ['b', 'c']] = [6, 7] + result = df.copy() + + expected = DataFrame(index = [1, 2], columns = ['a', 'b', 'c']) + + expected.loc[1, 'b'] = 6 + expected.loc[1, 'c'] = 7 + tm.assert_frame_equal(result, expected, check_dtype = False) + + + + + + + + From c93cbd6387a7a1095b92956f43adf478753dfe89 Mon Sep 17 00:00:00 2001 From: GYvan <78673871+GYvan@users.noreply.github.com> Date: Thu, 22 Jul 2021 23:10:47 +0200 Subject: [PATCH 3/9] Test for issue #25594 --- pandas/tests/indexing/test_loc.py | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 40ea27379bdc2..885e087943a67 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2768,24 +2768,15 @@ def test_loc_setitem_dict_timedelta_multiple_set(self): [[Timedelta(6, unit="s"), "foo"]], columns=["time", "value"], index=[1] ) tm.assert_frame_equal(result, expected) - def test_loc_set_multiple_items_in_multiple_new_columns(self): # GH 25594 - df = DataFrame(index=[1, 2], columns = ['a']) - df.loc[1, ['b', 'c']] = [6, 7] + df = DataFrame(index=[1, 2], columns=["a"]) + df.loc[1, ["b", "c"]] = [6, 7] result = df.copy() - - expected = DataFrame(index = [1, 2], columns = ['a', 'b', 'c']) - - expected.loc[1, 'b'] = 6 - expected.loc[1, 'c'] = 7 - tm.assert_frame_equal(result, expected, check_dtype = False) - - - - - - - - + + expected = DataFrame(index=[1, 2], columns=["a", "b", "c"]) + expected.loc[1, "b"] = 6 + expected.loc[1, "c"] = 7 + + tm.assert_frame_equal(result, expected, check_dtype=False) From 4afc50b535236e3be2eb6ece723377b4873f0be6 Mon Sep 17 00:00:00 2001 From: GYvan <78673871+GYvan@users.noreply.github.com> Date: Fri, 23 Jul 2021 16:22:32 +0200 Subject: [PATCH 4/9] reverted the changes --- doc/source/ecosystem.rst | 6 ------ 1 file changed, 6 deletions(-) diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst index e58779c090d8f..ee061e7b7d3e6 100644 --- a/doc/source/ecosystem.rst +++ b/doc/source/ecosystem.rst @@ -445,12 +445,6 @@ provides a familiar ``DataFrame`` interface for out-of-core, parallel and distri Dask-ML enables parallel and distributed machine learning using Dask alongside existing machine learning libraries like Scikit-Learn, XGBoost, and TensorFlow. -`Ibis `__ -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Ibis offers a standard way to write analytics code, that can be run in multiple engines. It helps in bridging the gap between local Python environments (like pandas) and remote storage and execution systems like Hadoop components (like HDFS, Impala, Hive, Spark) and SQL databases (Postgres, etc.). - - `Koalas `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 98047f44328fec788b460baafc434401bdd87253 Mon Sep 17 00:00:00 2001 From: GYvan <78673871+GYvan@users.noreply.github.com> Date: Tue, 27 Jul 2021 13:21:14 +0200 Subject: [PATCH 5/9] Test Loc to set Multiple Items to multiple new columns - Changes Made --- pandas/tests/indexing/test_loc.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 885e087943a67..2a33d7b658ab4 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2773,10 +2773,7 @@ def test_loc_set_multiple_items_in_multiple_new_columns(self): # GH 25594 df = DataFrame(index=[1, 2], columns=["a"]) df.loc[1, ["b", "c"]] = [6, 7] - result = df.copy() - expected = DataFrame(index=[1, 2], columns=["a", "b", "c"]) - expected.loc[1, "b"] = 6 - expected.loc[1, "c"] = 7 + expected = DataFrame({"a": [np.nan, np.nan], "b": [6, np.nan], "c": [7, np.nan]}, index = [1, 2]) - tm.assert_frame_equal(result, expected, check_dtype=False) + tm.assert_frame_equal(df, expected, check_dtype=False) From d44cdbac1eb2a525ac3393aacfbf717b3c1f1305 Mon Sep 17 00:00:00 2001 From: GYvan <78673871+GYvan@users.noreply.github.com> Date: Tue, 27 Jul 2021 13:50:43 +0200 Subject: [PATCH 6/9] Test Loc to set Multiple Items to multiple new columns - Changes made and linting addresssed --- pandas/tests/indexing/test_loc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 2a33d7b658ab4..78f1db1436fae 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2775,5 +2775,5 @@ def test_loc_set_multiple_items_in_multiple_new_columns(self): df.loc[1, ["b", "c"]] = [6, 7] expected = DataFrame({"a": [np.nan, np.nan], "b": [6, np.nan], "c": [7, np.nan]}, index = [1, 2]) - + tm.assert_frame_equal(df, expected, check_dtype=False) From 4f2a69c25f17c43eb25eafea8aee7c0ed4c4bc7e Mon Sep 17 00:00:00 2001 From: GYvan <78673871+GYvan@users.noreply.github.com> Date: Tue, 27 Jul 2021 14:21:04 +0200 Subject: [PATCH 7/9] TST: Test Loc to set Multiple Items to multiple new columns - Changes Made and pre-commit --- pandas/tests/indexing/test_loc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 78f1db1436fae..aff4784ea7306 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2775,5 +2775,5 @@ def test_loc_set_multiple_items_in_multiple_new_columns(self): df.loc[1, ["b", "c"]] = [6, 7] expected = DataFrame({"a": [np.nan, np.nan], "b": [6, np.nan], "c": [7, np.nan]}, index = [1, 2]) - - tm.assert_frame_equal(df, expected, check_dtype=False) + + tm.assert_frame_equal(df, expected, check_dtype=False) \ No newline at end of file From d785568497ceb7c886bd4b9c39315a5b530a33fd Mon Sep 17 00:00:00 2001 From: GYvan <78673871+GYvan@users.noreply.github.com> Date: Tue, 27 Jul 2021 14:47:00 +0200 Subject: [PATCH 8/9] TST: test for issue #25594 - Changes made --- pandas/tests/indexing/test_loc.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index aff4784ea7306..4c64af34eb7b4 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2774,6 +2774,8 @@ def test_loc_set_multiple_items_in_multiple_new_columns(self): df = DataFrame(index=[1, 2], columns=["a"]) df.loc[1, ["b", "c"]] = [6, 7] - expected = DataFrame({"a": [np.nan, np.nan], "b": [6, np.nan], "c": [7, np.nan]}, index = [1, 2]) + expected = DataFrame( + {"a": [np.nan, np.nan], "b": [6, np.nan], "c": [7, np.nan]}, index=[1, 2] + ) - tm.assert_frame_equal(df, expected, check_dtype=False) \ No newline at end of file + tm.assert_frame_equal(df, expected, check_dtype=False) From e654acea2c59c5ecefbdff618190e1889af72052 Mon Sep 17 00:00:00 2001 From: GYvan <78673871+GYvan@users.noreply.github.com> Date: Tue, 27 Jul 2021 17:00:49 +0200 Subject: [PATCH 9/9] removed the check_dtype --- pandas/tests/indexing/test_loc.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 4c64af34eb7b4..492424255bcf2 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2775,7 +2775,12 @@ def test_loc_set_multiple_items_in_multiple_new_columns(self): df.loc[1, ["b", "c"]] = [6, 7] expected = DataFrame( - {"a": [np.nan, np.nan], "b": [6, np.nan], "c": [7, np.nan]}, index=[1, 2] + { + "a": Series([np.nan, np.nan], dtype="object"), + "b": [6, np.nan], + "c": [7, np.nan], + }, + index=[1, 2], ) - tm.assert_frame_equal(df, expected, check_dtype=False) + tm.assert_frame_equal(df, expected)