From 7d46802fae0142a0c680880899659453d86da32e Mon Sep 17 00:00:00 2001
From: Lars Reimann <mail@larsreimann.com>
Date: Mon, 27 Mar 2023 17:10:26 +0200
Subject: [PATCH 1/2] feat: function to drop columns/rows with missing values

---
 src/safeds/data/tabular/containers/_table.py  | 39 +++++++++++++++----
 .../test_drop_columns_with_missing_values.py  | 27 +++++++++++++
 .../test_drop_rows_with_missing_values.py     | 27 +++++++++++++
 3 files changed, 86 insertions(+), 7 deletions(-)
 create mode 100644 tests/safeds/data/tabular/containers/_table/test_drop_columns_with_missing_values.py
 create mode 100644 tests/safeds/data/tabular/containers/_table/test_drop_rows_with_missing_values.py

diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py
index f34ee6c02..61e3eb512 100644
--- a/src/safeds/data/tabular/containers/_table.py
+++ b/src/safeds/data/tabular/containers/_table.py
@@ -272,7 +272,7 @@ def get_column(self, column_name: str) -> Column:
         if self._schema.has_column(column_name):
             output_column = Column(
                 self._data.iloc[
-                    :, [self._schema._get_column_index_by_name(column_name)]
+                :, [self._schema._get_column_index_by_name(column_name)]
                 ].squeeze(),
                 column_name,
                 self._schema.get_type_of_column(column_name),
@@ -573,6 +573,21 @@ def drop_columns(self, column_names: list[str]) -> Table:
         )
         return Table(transformed_data)
 
+    def drop_columns_with_missing_values(self) -> Table:
+        """
+        Return a table without the columns that contain missing values.
+
+        Returns
+        -------
+        table : Table
+            A table without the columns that contain missing values.
+        """
+        return Table.from_columns([
+            column
+            for column in self.to_columns()
+            if not column.has_missing_values()
+        ])
+
     def drop_columns_with_non_numerical_values(self) -> Table:
         """
         Return a table without the columns that contain non-numerical values.
@@ -593,12 +608,24 @@ def drop_duplicate_rows(self) -> Table:
         -------
         result : Table
             The table with the duplicate rows removed.
-
         """
         df = self._data.drop_duplicates(ignore_index=True)
         df.columns = self._schema.get_column_names()
         return Table(df)
 
+    def drop_rows_with_missing_values(self) -> Table:
+        """
+        Return a table without the rows that contain missing values.
+
+        Returns
+        -------
+        table : Table
+            A table without the rows that contain missing values.
+        """
+        result = self._data.copy(deep=True)
+        result = result.dropna(axis="index")
+        return Table(result, self._schema)
+
     def drop_rows_with_outliers(self) -> Table:
         """
         Remove all rows from the table that contain at least one outlier defined as having a value that has a distance
@@ -828,10 +855,7 @@ def slice(
 
     def sort_columns(
         self,
-        query: Callable[[Column, Column], int] = lambda col1, col2: (
-            col1.name > col2.name
-        )
-        - (col1.name < col2.name),
+        query: Callable[[Column, Column], int] = lambda col1, col2: (col1.name > col2.name) - (col1.name < col2.name),
     ) -> Table:
         """
         Sort a table with the given lambda function.
@@ -868,7 +892,8 @@ def split(self, percentage_in_first: float) -> typing.Tuple[Table, Table]:
         Returns
         -------
         result : (Table, Table)
-            A tuple containing the two resulting tables. The first table has the specified size, the second table contains the rest of the data.
+            A tuple containing the two resulting tables. The first table has the specified size, the second table
+            contains the rest of the data.
 
 
         """
diff --git a/tests/safeds/data/tabular/containers/_table/test_drop_columns_with_missing_values.py b/tests/safeds/data/tabular/containers/_table/test_drop_columns_with_missing_values.py
new file mode 100644
index 000000000..41ee02574
--- /dev/null
+++ b/tests/safeds/data/tabular/containers/_table/test_drop_columns_with_missing_values.py
@@ -0,0 +1,27 @@
+import numpy as np
+import pandas as pd
+from safeds.data.tabular.containers import Table
+from safeds.data.tabular.typing import ColumnType, TableSchema
+
+
+def test_drop_columns_with_missing_values_valid() -> None:
+    table = Table(
+        pd.DataFrame(
+            data={
+                "col1": [None, None, None, None],
+                "col2": [1, 2, 3, None],
+                "col3": [1, 2, 3, 4],
+                "col4": [2, 3, 1, 4],
+            }
+        )
+    )
+    updated_table = table.drop_columns_with_missing_values()
+    assert updated_table.get_column_names() == ["col3", "col4"]
+
+
+def test_drop_columns_with_missing_values_empty() -> None:
+    table = Table(
+        [], TableSchema({"col1": ColumnType.from_numpy_dtype(np.dtype(float))})
+    )
+    updated_table = table.drop_columns_with_missing_values()
+    assert updated_table.get_column_names() == ["col1"]
diff --git a/tests/safeds/data/tabular/containers/_table/test_drop_rows_with_missing_values.py b/tests/safeds/data/tabular/containers/_table/test_drop_rows_with_missing_values.py
new file mode 100644
index 000000000..80bfe3ea9
--- /dev/null
+++ b/tests/safeds/data/tabular/containers/_table/test_drop_rows_with_missing_values.py
@@ -0,0 +1,27 @@
+import numpy as np
+import pandas as pd
+from safeds.data.tabular.containers import Table
+from safeds.data.tabular.typing import ColumnType, TableSchema
+
+
+def test_drop_rows_with_missing_values_valid() -> None:
+    table = Table(
+        pd.DataFrame(
+            data={
+                "col1": [None, None, "C", "A"],
+                "col2": [None, "Test1", "Test3", "Test1"],
+                "col3": [None, 2, 3, 4],
+                "col4": [None, 3, 1, 4],
+            }
+        )
+    )
+    updated_table = table.drop_rows_with_missing_values()
+    assert updated_table.count_rows() == 2
+
+
+def test_drop_rows_with_missing_values_empty() -> None:
+    table = Table(
+        [], TableSchema({"col1": ColumnType.from_numpy_dtype(np.dtype(float))})
+    )
+    updated_table = table.drop_rows_with_missing_values()
+    assert updated_table.get_column_names() == ["col1"]

From 491724fb10e4acde3ac8d8ae8e277f54d18f7860 Mon Sep 17 00:00:00 2001
From: lars-reimann <lars-reimann@users.noreply.github.com>
Date: Mon, 27 Mar 2023 15:13:51 +0000
Subject: [PATCH 2/2] style: apply automated linter fixes

---
 src/safeds/data/tabular/containers/_table.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py
index 61e3eb512..3b12b7dea 100644
--- a/src/safeds/data/tabular/containers/_table.py
+++ b/src/safeds/data/tabular/containers/_table.py
@@ -272,7 +272,7 @@ def get_column(self, column_name: str) -> Column:
         if self._schema.has_column(column_name):
             output_column = Column(
                 self._data.iloc[
-                :, [self._schema._get_column_index_by_name(column_name)]
+                    :, [self._schema._get_column_index_by_name(column_name)]
                 ].squeeze(),
                 column_name,
                 self._schema.get_type_of_column(column_name),
@@ -582,11 +582,9 @@ def drop_columns_with_missing_values(self) -> Table:
         table : Table
             A table without the columns that contain missing values.
         """
-        return Table.from_columns([
-            column
-            for column in self.to_columns()
-            if not column.has_missing_values()
-        ])
+        return Table.from_columns(
+            [column for column in self.to_columns() if not column.has_missing_values()]
+        )
 
     def drop_columns_with_non_numerical_values(self) -> Table:
         """
@@ -855,7 +853,10 @@ def slice(
 
     def sort_columns(
         self,
-        query: Callable[[Column, Column], int] = lambda col1, col2: (col1.name > col2.name) - (col1.name < col2.name),
+        query: Callable[[Column, Column], int] = lambda col1, col2: (
+            col1.name > col2.name
+        )
+        - (col1.name < col2.name),
     ) -> Table:
         """
         Sort a table with the given lambda function.