diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index f34ee6c02..3b12b7dea 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -573,6 +573,19 @@ def drop_columns(self, column_names: list[str]) -> Table: ) return Table(transformed_data) + def drop_columns_with_missing_values(self) -> Table: + """ + Return a table without the columns that contain missing values. + + Returns + ------- + table : Table + A table without the columns that contain missing values. + """ + return Table.from_columns( + [column for column in self.to_columns() if not column.has_missing_values()] + ) + def drop_columns_with_non_numerical_values(self) -> Table: """ Return a table without the columns that contain non-numerical values. @@ -593,12 +606,24 @@ def drop_duplicate_rows(self) -> Table: ------- result : Table The table with the duplicate rows removed. - """ df = self._data.drop_duplicates(ignore_index=True) df.columns = self._schema.get_column_names() return Table(df) + def drop_rows_with_missing_values(self) -> Table: + """ + Return a table without the rows that contain missing values. + + Returns + ------- + table : Table + A table without the rows that contain missing values. + """ + result = self._data.copy(deep=True) + result = result.dropna(axis="index") + return Table(result, self._schema) + def drop_rows_with_outliers(self) -> Table: """ Remove all rows from the table that contain at least one outlier defined as having a value that has a distance @@ -868,7 +893,8 @@ def split(self, percentage_in_first: float) -> typing.Tuple[Table, Table]: Returns ------- result : (Table, Table) - A tuple containing the two resulting tables. The first table has the specified size, the second table contains the rest of the data. + A tuple containing the two resulting tables. The first table has the specified size, the second table + contains the rest of the data. """ diff --git a/tests/safeds/data/tabular/containers/_table/test_drop_columns_with_missing_values.py b/tests/safeds/data/tabular/containers/_table/test_drop_columns_with_missing_values.py new file mode 100644 index 000000000..41ee02574 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/test_drop_columns_with_missing_values.py @@ -0,0 +1,27 @@ +import numpy as np +import pandas as pd +from safeds.data.tabular.containers import Table +from safeds.data.tabular.typing import ColumnType, TableSchema + + +def test_drop_columns_with_missing_values_valid() -> None: + table = Table( + pd.DataFrame( + data={ + "col1": [None, None, None, None], + "col2": [1, 2, 3, None], + "col3": [1, 2, 3, 4], + "col4": [2, 3, 1, 4], + } + ) + ) + updated_table = table.drop_columns_with_missing_values() + assert updated_table.get_column_names() == ["col3", "col4"] + + +def test_drop_columns_with_missing_values_empty() -> None: + table = Table( + [], TableSchema({"col1": ColumnType.from_numpy_dtype(np.dtype(float))}) + ) + updated_table = table.drop_columns_with_missing_values() + assert updated_table.get_column_names() == ["col1"] diff --git a/tests/safeds/data/tabular/containers/_table/test_drop_rows_with_missing_values.py b/tests/safeds/data/tabular/containers/_table/test_drop_rows_with_missing_values.py new file mode 100644 index 000000000..80bfe3ea9 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/test_drop_rows_with_missing_values.py @@ -0,0 +1,27 @@ +import numpy as np +import pandas as pd +from safeds.data.tabular.containers import Table +from safeds.data.tabular.typing import ColumnType, TableSchema + + +def test_drop_rows_with_missing_values_valid() -> None: + table = Table( + pd.DataFrame( + data={ + "col1": [None, None, "C", "A"], + "col2": [None, "Test1", "Test3", "Test1"], + "col3": [None, 2, 3, 4], + "col4": [None, 3, 1, 4], + } + ) + ) + updated_table = table.drop_rows_with_missing_values() + assert updated_table.count_rows() == 2 + + +def test_drop_rows_with_missing_values_empty() -> None: + table = Table( + [], TableSchema({"col1": ColumnType.from_numpy_dtype(np.dtype(float))}) + ) + updated_table = table.drop_rows_with_missing_values() + assert updated_table.get_column_names() == ["col1"]