Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 28 additions & 2 deletions src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,19 @@ def drop_columns(self, column_names: list[str]) -> Table:
)
return Table(transformed_data)

def drop_columns_with_missing_values(self) -> Table:
"""
Return a table without the columns that contain missing values.

Returns
-------
table : Table
A table without the columns that contain missing values.
"""
return Table.from_columns(
[column for column in self.to_columns() if not column.has_missing_values()]
)

def drop_columns_with_non_numerical_values(self) -> Table:
"""
Return a table without the columns that contain non-numerical values.
Expand All @@ -593,12 +606,24 @@ def drop_duplicate_rows(self) -> Table:
-------
result : Table
The table with the duplicate rows removed.

"""
df = self._data.drop_duplicates(ignore_index=True)
df.columns = self._schema.get_column_names()
return Table(df)

def drop_rows_with_missing_values(self) -> Table:
"""
Return a table without the rows that contain missing values.

Returns
-------
table : Table
A table without the rows that contain missing values.
"""
result = self._data.copy(deep=True)
result = result.dropna(axis="index")
return Table(result, self._schema)

def drop_rows_with_outliers(self) -> Table:
"""
Remove all rows from the table that contain at least one outlier defined as having a value that has a distance
Expand Down Expand Up @@ -868,7 +893,8 @@ def split(self, percentage_in_first: float) -> typing.Tuple[Table, Table]:
Returns
-------
result : (Table, Table)
A tuple containing the two resulting tables. The first table has the specified size, the second table contains the rest of the data.
A tuple containing the two resulting tables. The first table has the specified size, the second table
contains the rest of the data.


"""
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import numpy as np
import pandas as pd
from safeds.data.tabular.containers import Table
from safeds.data.tabular.typing import ColumnType, TableSchema


def test_drop_columns_with_missing_values_valid() -> None:
table = Table(
pd.DataFrame(
data={
"col1": [None, None, None, None],
"col2": [1, 2, 3, None],
"col3": [1, 2, 3, 4],
"col4": [2, 3, 1, 4],
}
)
)
updated_table = table.drop_columns_with_missing_values()
assert updated_table.get_column_names() == ["col3", "col4"]


def test_drop_columns_with_missing_values_empty() -> None:
table = Table(
[], TableSchema({"col1": ColumnType.from_numpy_dtype(np.dtype(float))})
)
updated_table = table.drop_columns_with_missing_values()
assert updated_table.get_column_names() == ["col1"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import numpy as np
import pandas as pd
from safeds.data.tabular.containers import Table
from safeds.data.tabular.typing import ColumnType, TableSchema


def test_drop_rows_with_missing_values_valid() -> None:
table = Table(
pd.DataFrame(
data={
"col1": [None, None, "C", "A"],
"col2": [None, "Test1", "Test3", "Test1"],
"col3": [None, 2, 3, 4],
"col4": [None, 3, 1, 4],
}
)
)
updated_table = table.drop_rows_with_missing_values()
assert updated_table.count_rows() == 2


def test_drop_rows_with_missing_values_empty() -> None:
table = Table(
[], TableSchema({"col1": ColumnType.from_numpy_dtype(np.dtype(float))})
)
updated_table = table.drop_rows_with_missing_values()
assert updated_table.get_column_names() == ["col1"]