From f9f9483bd1d1ff45001098809719545959d3fa7a Mon Sep 17 00:00:00 2001
From: Lars Reimann <mail@larsreimann.com>
Date: Mon, 27 Mar 2023 16:25:53 +0200
Subject: [PATCH 1/3] style: structure methods of `Table`

---
 src/safeds/data/tabular/containers/_table.py  | 987 +++++++++---------
 .../data/tabular/transformation/_imputer.py   |   4 +-
 ..._drop_columns_with_non_numerical_values.py |  30 +
 3 files changed, 549 insertions(+), 472 deletions(-)
 create mode 100644 tests/safeds/data/tabular/containers/_table/test_drop_columns_with_non_numerical_values.py

diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py
index 1eed44683..de05e8fa3 100644
--- a/src/safeds/data/tabular/containers/_table.py
+++ b/src/safeds/data/tabular/containers/_table.py
@@ -12,6 +12,8 @@
 import seaborn as sns
 from IPython.core.display_functions import DisplayHandle, display
 from pandas import DataFrame, Series
+from scipy import stats
+
 from safeds.data.tabular.containers._column import Column
 from safeds.data.tabular.containers._row import Row
 from safeds.data.tabular.typing import ColumnType, TableSchema
@@ -26,7 +28,6 @@
     SchemaMismatchError,
     UnknownColumnNameError,
 )
-from scipy import stats
 
 
 # noinspection PyProtectedMember
@@ -47,44 +48,39 @@ class Table:
         If the table is empty and no schema is specified.
     """
 
-    def __init__(self, data: typing.Iterable, schema: Optional[TableSchema] = None):
-        self._data: pd.Dataframe = (
-            data if isinstance(data, pd.DataFrame) else pd.DataFrame(data)
-        )
-        if schema is None:
-            if self.count_columns() == 0:
-                raise MissingSchemaError()
-            self.schema: TableSchema = TableSchema._from_dataframe(self._data)
-        else:
-            self.schema = schema
-            if self._data.empty:
-                self._data = pd.DataFrame(columns=self.schema.get_column_names())
-
-        self._data = self._data.reset_index(drop=True)
-        self._data.columns = list(range(self.count_columns()))
+    # ------------------------------------------------------------------------------------------------------------------
+    # Creation
+    # ------------------------------------------------------------------------------------------------------------------
 
-    def get_row(self, index: int) -> Row:
+    @staticmethod
+    def from_csv(path: str) -> Table:
         """
-        Return the row at a specified index.
+        Read data from a CSV file into a table.
 
         Parameters
         ----------
-        index : int
-            The index.
+        path : str
+            The path to the CSV file.
 
         Returns
         -------
-        row : Row
-            The row of the table at the index.
+        table : Table
+            The table created from the CSV file.
 
         Raises
         ------
-        IndexOutOfBoundsError
-            If no row at the specified index exists in this table.
+        FileNotFoundError
+            If the specified file does not exist.
+        ValueError
+            If the file could not be read.
         """
-        if len(self._data.index) - 1 < index or index < 0:
-            raise IndexOutOfBoundsError(index)
-        return Row(self._data.iloc[[index]].squeeze(), self.schema)
+
+        try:
+            return Table(pd.read_csv(path))
+        except FileNotFoundError as exception:
+            raise FileNotFoundError(f'File "{path}" does not exist') from exception
+        except Exception as exception:
+            raise ValueError(f'Could not read file from "{path}" as CSV') from exception
 
     @staticmethod
     def from_json(path: str) -> Table:
@@ -119,34 +115,42 @@ def from_json(path: str) -> Table:
             ) from exception
 
     @staticmethod
-    def from_csv(path: str) -> Table:
+    def from_columns(columns: list[Column]) -> Table:
         """
-        Read data from a CSV file into a table.
+        Return a table created from a list of columns.
 
         Parameters
         ----------
-        path : str
-            The path to the CSV file.
+        columns : list[Column]
+            The columns to be combined. They need to have the same size.
 
         Returns
         -------
         table : Table
-            The table created from the CSV file.
+            The generated table.
 
         Raises
         ------
-        FileNotFoundError
-            If the specified file does not exist.
-        ValueError
-            If the file could not be read.
+        MissingDataError
+            If an empty list is given.
+        ColumnLengthMismatchError
+            If any of the column sizes does not match with the others.
         """
+        if len(columns) == 0:
+            raise MissingDataError("This function requires at least one column.")
 
-        try:
-            return Table(pd.read_csv(path))
-        except FileNotFoundError as exception:
-            raise FileNotFoundError(f'File "{path}" does not exist') from exception
-        except Exception as exception:
-            raise ValueError(f'Could not read file from "{path}" as CSV') from exception
+        dataframe: DataFrame = pd.DataFrame()
+
+        for column in columns:
+            if column._data.size != columns[0]._data.size:
+                raise ColumnLengthMismatchError(
+                    "\n".join(
+                        [f"{column.name}: {column._data.size}" for column in columns]
+                    )
+                )
+            dataframe[column.name] = column._data
+
+        return Table(dataframe)
 
     @staticmethod
     def from_rows(rows: list[Row]) -> Table:
@@ -185,109 +189,67 @@ def from_rows(rows: list[Row]) -> Table:
         dataframe.columns = schema_compare.get_column_names()
         return Table(dataframe)
 
-    @staticmethod
-    def from_columns(columns: list[Column]) -> Table:
-        """
-        Return a table created from a list of columns.
-
-        Parameters
-        ----------
-        columns : list[Column]
-            The columns to be combined. They need to have the same size.
-
-        Returns
-        -------
-        table : Table
-            The generated table.
-
-        Raises
-        ------
-        MissingDataError
-            If an empty list is given.
-        ColumnLengthMismatchError
-            If any of the column sizes does not match with the others.
-        """
-        if len(columns) == 0:
-            raise MissingDataError("This function requires at least one column.")
+    # ------------------------------------------------------------------------------------------------------------------
+    # Dunder methods
+    # ------------------------------------------------------------------------------------------------------------------
 
-        dataframe: DataFrame = pd.DataFrame()
+    def __init__(self, data: typing.Iterable, schema: Optional[TableSchema] = None):
+        self._data: pd.Dataframe = (
+            data if isinstance(data, pd.DataFrame) else pd.DataFrame(data)
+        )
+        if schema is None:
+            if self.count_columns() == 0:
+                raise MissingSchemaError()
+            self._schema: TableSchema = TableSchema._from_dataframe(self._data)
+        else:
+            self._schema = schema
+            if self._data.empty:
+                self._data = pd.DataFrame(columns=self._schema.get_column_names())
 
-        for column in columns:
-            if column._data.size != columns[0]._data.size:
-                raise ColumnLengthMismatchError(
-                    "\n".join(
-                        [f"{column.name}: {column._data.size}" for column in columns]
-                    )
-                )
-            dataframe[column.name] = column._data
+        self._data = self._data.reset_index(drop=True)
+        self._data.columns = list(range(self.count_columns()))
 
-        return Table(dataframe)
+    def __eq__(self, other: typing.Any) -> bool:
+        if not isinstance(other, Table):
+            return NotImplemented
+        if self is other:
+            return True
+        table1 = self.sort_columns()
+        table2 = other.sort_columns()
+        return table1._data.equals(table2._data) and table1._schema == table2._schema
 
-    def to_json(self, path_to_file: str) -> None:
-        """
-        Write the data from the table into a JSON file.
-        If the file and/or the directories do not exist, they will be created.
-        If the file already exists it will be overwritten.
+    def __hash__(self) -> int:
+        return hash(self._data)
 
-        Parameters
-        ----------
-        path_to_file : str
-            The path to the output file.
-        """
-        Path(os.path.dirname(path_to_file)).mkdir(parents=True, exist_ok=True)
-        data_to_json = self._data.copy()
-        data_to_json.columns = self.schema.get_column_names()
-        data_to_json.to_json(path_to_file)
+    def __repr__(self) -> str:
+        tmp = self._data.copy(deep=True)
+        tmp.columns = self.get_column_names()
+        return tmp.__repr__()
 
-    def to_csv(self, path_to_file: str) -> None:
-        """
-        Write the data from the table into a CSV file.
-        If the file and/or the directories do not exist they will be created.
-        If the file already exists it will be overwritten.
+    def __str__(self) -> str:
+        tmp = self._data.copy(deep=True)
+        tmp.columns = self.get_column_names()
+        return tmp.__str__()
 
-        Parameters
-        ----------
-        path_to_file : str
-            The path to the output file.
-        """
-        Path(os.path.dirname(path_to_file)).mkdir(parents=True, exist_ok=True)
-        data_to_csv = self._data.copy()
-        data_to_csv.columns = self.schema.get_column_names()
-        data_to_csv.to_csv(path_to_file, index=False)
+    # ------------------------------------------------------------------------------------------------------------------
+    # Properties
+    # ------------------------------------------------------------------------------------------------------------------
 
-    def rename_column(self, old_name: str, new_name: str) -> Table:
+    @property
+    def schema(self) -> TableSchema:
         """
-        Rename a single column.
-
-        Parameters
-        ----------
-        old_name : str
-            The old name of the target column
-        new_name : str
-            The new name of the target column
+        Return the schema of the table.
 
         Returns
         -------
-        table : Table
-            The Table with the renamed column.
-
-        Raises
-        ------
-        ColumnNameError
-            If the specified old target column name does not exist.
-        DuplicateColumnNameError
-            If the specified new target column name already exists.
+        schema : TableSchema
+            The schema.
         """
-        if old_name not in self.schema.get_column_names():
-            raise UnknownColumnNameError([old_name])
-        if old_name == new_name:
-            return self
-        if new_name in self.schema.get_column_names():
-            raise DuplicateColumnNameError(new_name)
+        return self._schema
 
-        new_df = self._data.copy()
-        new_df.columns = self.schema.get_column_names()
-        return Table(new_df.rename(columns={old_name: new_name}))
+    # ------------------------------------------------------------------------------------------------------------------
+    # Getters
+    # ------------------------------------------------------------------------------------------------------------------
 
     def get_column(self, column_name: str) -> Column:
         """
@@ -308,120 +270,95 @@ def get_column(self, column_name: str) -> Column:
         UnknownColumnNameError
             If the specified target column name does not exist.
         """
-        if self.schema.has_column(column_name):
+        if self._schema.has_column(column_name):
             output_column = Column(
                 self._data.iloc[
-                    :, [self.schema._get_column_index_by_name(column_name)]
+                :, [self._schema._get_column_index_by_name(column_name)]
                 ].squeeze(),
                 column_name,
-                self.schema.get_type_of_column(column_name),
+                self._schema.get_type_of_column(column_name),
             )
             return output_column
 
         raise UnknownColumnNameError([column_name])
 
-    def drop_columns(self, column_names: list[str]) -> Table:
+    def has_column(self, column_name: str) -> bool:
         """
-        Return a table without the given column(s).
+        Return whether the table contains a given column.
+        Alias for self.schema.hasColumn(column_name: str) -> bool.
 
         Parameters
         ----------
+        column_name : str
+            The name of the column.
+
+        Returns
+        -------
+        contains : bool
+            True if the column exists.
+        """
+        return self._schema.has_column(column_name)
+
+    def get_column_names(self) -> list[str]:
+        """
+        Return a list of all column names in this table.
+        Alias for self.schema.get_column_names() -> list[str].
+
+        Returns
+        -------
         column_names : list[str]
-            A list containing all columns to be dropped.
+            The list of the column names.
+        """
+        return self._schema.get_column_names()
+
+    def get_type_of_column(self, column_name: str) -> ColumnType:
+        """
+        Return the type of the given column.
+        Alias for self.schema.get_type_of_column(column_name: str) -> ColumnType.
+
+        Parameters
+        ----------
+        column_name : str
+            The name of the column to be queried.
 
         Returns
         -------
-        table : Table
-            A table without the given columns.
+        type : ColumnType
+            The type of the column.
 
         Raises
         ------
         ColumnNameError
-            If any of the given columns do not exist.
+            If the specified target column name does not exist.
         """
-        invalid_columns = []
-        column_indices = []
-        for name in column_names:
-            if not self.schema.has_column(name):
-                invalid_columns.append(name)
-            else:
-                column_indices.append(self.schema._get_column_index_by_name(name))
-        if len(invalid_columns) != 0:
-            raise UnknownColumnNameError(invalid_columns)
-        transformed_data = self._data.drop(labels=column_indices, axis="columns")
-        transformed_data.columns = list(
-            name for name in self.schema.get_column_names() if name not in column_names
-        )
-        return Table(transformed_data)
+        return self._schema.get_type_of_column(column_name)
 
-    def keep_columns(self, column_names: list[str]) -> Table:
+    def get_row(self, index: int) -> Row:
         """
-        Return a table with only the given column(s).
+        Return the row at a specified index.
 
         Parameters
         ----------
-        column_names : list[str]
-            A list containing only the columns to be kept.
+        index : int
+            The index.
 
         Returns
         -------
-        table : Table
-            A table containing only the given column(s).
+        row : Row
+            The row of the table at the index.
 
         Raises
         ------
-        ColumnNameError
-            If any of the given columns do not exist.
-        """
-        invalid_columns = []
-        column_indices = []
-        for name in column_names:
-            if not self.schema.has_column(name):
-                invalid_columns.append(name)
-            else:
-                column_indices.append(self.schema._get_column_index_by_name(name))
-        if len(invalid_columns) != 0:
-            raise UnknownColumnNameError(invalid_columns)
-        transformed_data = self._data[column_indices]
-        transformed_data.columns = list(
-            name for name in self.schema.get_column_names() if name in column_names
-        )
-        return Table(transformed_data)
-
-    def to_rows(self) -> list[Row]:
-        """
-        Return a list of the rows.
-
-        Returns
-        -------
-        rows : list[Row]
-            List of rows.
-        """
-        return [
-            Row(series_row, self.schema) for (_, series_row) in self._data.iterrows()
-        ]
-
-    def filter_rows(self, query: Callable[[Row], bool]) -> Table:
-        """
-        Return a table with rows filtered by Callable (e.g. lambda function).
-
-        Parameters
-        ----------
-        query : lambda function
-            A Callable that is applied to all rows.
-
-        Returns
-        -------
-        table : Table
-            A table containing only the rows filtered by the query.
+        IndexOutOfBoundsError
+            If no row at the specified index exists in this table.
         """
+        if len(self._data.index) - 1 < index or index < 0:
+            raise IndexOutOfBoundsError(index)
+        return Row(self._data.iloc[[index]].squeeze(), self._schema)
 
-        rows: list[Row] = [row for row in self.to_rows() if query(row)]
-        if len(rows) == 0:
-            result_table = Table([], self.schema)
-        else:
-            result_table = self.from_rows(rows)
-        return result_table
+    # ------------------------------------------------------------------------------------------------------------------
+    # Information
+    # ------------------------------------------------------------------------------------------------------------------
 
     def count_rows(self) -> int:
         """
@@ -445,82 +382,53 @@ def count_columns(self) -> int:
         """
         return self._data.shape[1]
 
-    def to_columns(self) -> list[Column]:
-        """
-        Return a list of the columns.
-
-        Returns
-        -------
-        columns : list[Columns]
-            List of columns.
-        """
-        return [self.get_column(name) for name in self.schema.get_column_names()]
-
-    def drop_duplicate_rows(self) -> Table:
+    def summary(self) -> Table:
         """
-        Return a copy of the table with every duplicate row removed.
+        Return a table with a number of statistical key values.
 
         Returns
         -------
         result : Table
-            The table with the duplicate rows removed.
-
-        """
-        df = self._data.drop_duplicates(ignore_index=True)
-        df.columns = self.schema.get_column_names()
-        return Table(df)
-
-    def replace_column(self, old_column_name: str, new_column: Column) -> Table:
+            The table with statistics.
         """
-        Return a copy of the table with the specified old column replaced by a new column. Keeps the order of columns.
-
-        Parameters
-        ----------
-        old_column_name : str
-            The name of the column to be replaced.
-
-        new_column : Column
-            The new column replacing the old column.
 
-        Returns
-        -------
-        result : Table
-            A table with the old column replaced by the new column.
-
-        Raises
-        ------
-        UnknownColumnNameError
-            If the old column does not exist.
-
-        DuplicateColumnNameError
-            If the new column already exists and the existing column is not affected by the replacement.
+        columns = self.to_columns()
+        result = pd.DataFrame()
+        statistics = {}
 
-        ColumnSizeError
-            If the size of the column does not match the amount of rows.
-        """
-        if old_column_name not in self.schema.get_column_names():
-            raise UnknownColumnNameError([old_column_name])
+        for column in columns:
+            statistics = {
+                "maximum": column.maximum,
+                "minimum": column.minimum,
+                "mean": column.mean,
+                "mode": column.mode,
+                "median": column.median,
+                "sum": column.sum,
+                "variance": column.variance,
+                "standard deviation": column.standard_deviation,
+                "idness": column.idness,
+                "stability": column.stability,
+                "row count": column.count,
+            }
+            values = []
 
-        if (
-            new_column.name in self.schema.get_column_names()
-            and new_column.name != old_column_name
-        ):
-            raise DuplicateColumnNameError(new_column.name)
+            for function in statistics.values():
+                try:
+                    values.append(str(function()))
+                except NonNumericColumnError:
+                    values.append("-")
 
-        if self.count_rows() != new_column._data.size:
-            raise ColumnSizeError(str(self.count_rows()), str(new_column._data.size))
+            result = pd.concat([result, pd.DataFrame(values)], axis=1)
 
-        if old_column_name != new_column.name:
-            renamed_table = self.rename_column(old_column_name, new_column.name)
-            result = renamed_table._data
-            result.columns = renamed_table.schema.get_column_names()
-        else:
-            result = self._data.copy()
-            result.columns = self.schema.get_column_names()
+        result = pd.concat([pd.DataFrame(list(statistics.keys())), result], axis=1)
+        result.columns = ["metrics"] + self.get_column_names()
 
-        result[new_column.name] = new_column._data
         return Table(result)
 
+    # ------------------------------------------------------------------------------------------------------------------
+    # Transformations
+    # ------------------------------------------------------------------------------------------------------------------
+
     def add_column(self, column: Column) -> Table:
         """
         Return the original table with the provided column attached at the end.
@@ -539,14 +447,14 @@ def add_column(self, column: Column) -> Table:
             If the size of the column does not match the amount of rows.
 
         """
-        if self.schema.has_column(column.name):
+        if self._schema.has_column(column.name):
             raise DuplicateColumnNameError(column.name)
 
         if column._data.size != self.count_rows():
             raise ColumnSizeError(str(self.count_rows()), str(column._data.size))
 
         result = self._data.copy()
-        result.columns = self.schema.get_column_names()
+        result.columns = self._schema.get_column_names()
         result[column.name] = column._data
         return Table(result)
 
@@ -574,7 +482,7 @@ def add_columns(self, columns: Union[list[Column], Table]) -> Table:
         if isinstance(columns, Table):
             columns = columns.to_columns()
         result = self._data.copy()
-        result.columns = self.schema.get_column_names()
+        result.columns = self._schema.get_column_names()
         for column in columns:
             if column.name in result.columns:
                 raise DuplicateColumnNameError(column.name)
@@ -600,10 +508,10 @@ def add_row(self, row: Row) -> Table:
             A new table with the added row at the end.
 
         """
-        if self.schema != row.schema:
+        if self._schema != row.schema:
             raise SchemaMismatchError()
         new_df = pd.concat([self._data, row._data.to_frame().T]).infer_objects()
-        new_df.columns = self.schema.get_column_names()
+        new_df.columns = self._schema.get_column_names()
         return Table(new_df)
 
     def add_rows(self, rows: Union[list[Row], Table]) -> Table:
@@ -624,269 +532,240 @@ def add_rows(self, rows: Union[list[Row], Table]) -> Table:
             rows = rows.to_rows()
         result = self._data
         for row in rows:
-            if self.schema != row.schema:
+            if self._schema != row.schema:
                 raise SchemaMismatchError()
         result = pd.concat(
             [result, *[row._data.to_frame().T for row in rows]]
         ).infer_objects()
-        result.columns = self.schema.get_column_names()
+        result.columns = self._schema.get_column_names()
         return Table(result)
 
-    def has_column(self, column_name: str) -> bool:
+    def drop_columns(self, column_names: list[str]) -> Table:
         """
-        Return whether the table contains a given column.
-        Alias for self.schema.hasColumn(column_name: str) -> bool.
+        Return a table without the given column(s).
 
         Parameters
         ----------
-        column_name : str
-            The name of the column.
+        column_names : list[str]
+            A list containing all columns to be dropped.
 
         Returns
         -------
-        contains : bool
-            True if the column exists.
+        table : Table
+            A table without the given columns.
+
+        Raises
+        ------
+        ColumnNameError
+            If any of the given columns do not exist.
         """
-        return self.schema.has_column(column_name)
+        invalid_columns = []
+        column_indices = []
+        for name in column_names:
+            if not self._schema.has_column(name):
+                invalid_columns.append(name)
+            else:
+                column_indices.append(self._schema._get_column_index_by_name(name))
+        if len(invalid_columns) != 0:
+            raise UnknownColumnNameError(invalid_columns)
+        transformed_data = self._data.drop(labels=column_indices, axis="columns")
+        transformed_data.columns = list(
+            name for name in self._schema.get_column_names() if name not in column_names
+        )
+        return Table(transformed_data)
 
-    def list_columns_with_missing_values(self) -> list[Column]:
+    def drop_duplicate_rows(self) -> Table:
         """
-        Return a list of all the columns that have at least one missing value. Returns an empty list if there are none.
+        Return a copy of the table with every duplicate row removed.
 
         Returns
         -------
-        columns_with_missing_values: list[Column]
-            The list of columns with missing values.
+        result : Table
+            The table with the duplicate rows removed.
+
         """
-        columns = self.to_columns()
-        columns_with_missing_values = []
-        for column in columns:
-            if column.has_missing_values():
-                columns_with_missing_values.append(column)
-        return columns_with_missing_values
+        df = self._data.drop_duplicates(ignore_index=True)
+        df.columns = self._schema.get_column_names()
+        return Table(df)
 
-    def list_columns_with_non_numerical_values(self) -> list[Column]:
+    def drop_rows_with_outliers(self) -> Table:
         """
-        Return a list of columns only containing non-numerical values.
+        Remove all rows from the table that contain at least one outlier defined as having a value that has a distance
+        of more than 3 standard deviations from the column average.
 
         Returns
         -------
-        cols : list[Column]
-            The list with only non-numerical columns.
+        new_table : Table
+            A new table without rows containing outliers.
         """
-        cols = []
-        for column_name, data_type in self.schema._schema.items():
-            if not data_type.is_numeric():
-                cols.append(self.get_column(column_name))
-        return cols
+        result = self._data.copy(deep=True)
 
-    def list_columns_with_numerical_values(self) -> list[Column]:
-        """
-        Return a list of columns only containing numerical values.
+        table_without_nonnumericals = Table.from_columns(
+            self.list_columns_with_numerical_values()
+        )
 
-        Returns
-        -------
-        cols : list[Column]
-            The list with only numerical columns.
-        """
-        cols = []
-        for column_name, data_type in self.schema._schema.items():
-            if data_type.is_numeric():
-                cols.append(self.get_column(column_name))
-        return cols
+        result = result[
+            (np.absolute(stats.zscore(table_without_nonnumericals._data)) < 3).all(
+                axis=1
+            )
+        ]
 
-    def get_column_names(self) -> list[str]:
+        return Table(result, self._schema)
+
+    def filter_rows(self, query: Callable[[Row], bool]) -> Table:
         """
-        Return a list of all column names in this table.
-        Alias for self.schema.get_column_names() -> list[str].
+        Return a table with rows filtered by Callable (e.g. lambda function).
+
+        Parameters
+        ----------
+        query : lambda function
+            A Callable that is applied to all rows.
 
         Returns
         -------
-        column_names : list[str]
-            The list of the column names.
+        table : Table
+            A table containing only the rows filtered by the query.
         """
-        return self.schema.get_column_names()
 
-    def get_type_of_column(self, column_name: str) -> ColumnType:
+        rows: list[Row] = [row for row in self.to_rows() if query(row)]
+        if len(rows) == 0:
+            result_table = Table([], self._schema)
+        else:
+            result_table = self.from_rows(rows)
+        return result_table
+
+    def keep_columns(self, column_names: list[str]) -> Table:
         """
-        Return the type of the given column.
-        Alias for self.schema.get_type_of_column(column_name: str) -> ColumnType.
+        Return a table with only the given column(s).
 
         Parameters
         ----------
-        column_name : str
-            The name of the column to be queried.
+        column_names : list[str]
+            A list containing only the columns to be kept.
 
         Returns
         -------
-        type : ColumnType
-            The type of the column.
+        table : Table
+            A table containing only the given column(s).
 
         Raises
         ------
         ColumnNameError
-            If the specified target column name does not exist.
+            If any of the given columns do not exist.
         """
-        return self.schema.get_type_of_column(column_name)
-
-    def sort_columns(
-        self,
-        query: Callable[[Column, Column], int] = lambda col1, col2: (
-            col1.name > col2.name
-        )
-        - (col1.name < col2.name),
-    ) -> Table:
+        invalid_columns = []
+        column_indices = []
+        for name in column_names:
+            if not self._schema.has_column(name):
+                invalid_columns.append(name)
+            else:
+                column_indices.append(self._schema._get_column_index_by_name(name))
+        if len(invalid_columns) != 0:
+            raise UnknownColumnNameError(invalid_columns)
+        transformed_data = self._data[column_indices]
+        transformed_data.columns = list(
+            name for name in self._schema.get_column_names() if name in column_names
+        )
+        return Table(transformed_data)
+
+    def rename_column(self, old_name: str, new_name: str) -> Table:
         """
-        Sort a table with the given lambda function.
-        If no function is given the columns will be sorted alphabetically.
-        This function uses the default python sort algorithm.
-        The query returns
-        0, if both columns are equal.
-        < 0, if the first column should be ordered after the second column.
-        > 0, if the first column should be ordered before the second column.
+        Rename a single column.
 
         Parameters
         ----------
-        query : a lambda function
-            The lambda function used to sort the columns.
+        old_name : str
+            The old name of the target column
+        new_name : str
+            The new name of the target column
 
         Returns
         -------
-        new_table : Table
-            A new table with sorted columns.
-        """
-        columns = self.to_columns()
-        columns.sort(key=functools.cmp_to_key(query))
-        return Table.from_columns(columns)
-
-    def drop_rows_with_outliers(self) -> Table:
-        """
-        Remove all rows from the table that contain at least one outlier defined as having a value that has a distance
-        of more than 3 standard deviations from the column average.
+        table : Table
+            The Table with the renamed column.
 
-        Returns
-        -------
-        new_table : Table
-            A new table without rows containing outliers.
+        Raises
+        ------
+        ColumnNameError
+            If the specified old target column name does not exist.
+        DuplicateColumnNameError
+            If the specified new target column name already exists.
         """
-        result = self._data.copy(deep=True)
-
-        table_without_nonnumericals = Table.from_columns(
-            self.list_columns_with_numerical_values()
-        )
-
-        result = result[
-            (np.absolute(stats.zscore(table_without_nonnumericals._data)) < 3).all(
-                axis=1
-            )
-        ]
+        if old_name not in self._schema.get_column_names():
+            raise UnknownColumnNameError([old_name])
+        if old_name == new_name:
+            return self
+        if new_name in self._schema.get_column_names():
+            raise DuplicateColumnNameError(new_name)
 
-        return Table(result, self.schema)
+        new_df = self._data.copy()
+        new_df.columns = self._schema.get_column_names()
+        return Table(new_df.rename(columns={old_name: new_name}))
 
-    def __eq__(self, other: typing.Any) -> bool:
-        if not isinstance(other, Table):
-            return NotImplemented
-        if self is other:
-            return True
-        table1 = self.sort_columns()
-        table2 = other.sort_columns()
-        return table1._data.equals(table2._data) and table1.schema == table2.schema
+    def replace_column(self, old_column_name: str, new_column: Column) -> Table:
+        """
+        Return a copy of the table with the specified old column replaced by a new column. Keeps the order of columns.
 
-    def __hash__(self) -> int:
-        return hash(self._data)
+        Parameters
+        ----------
+        old_column_name : str
+            The name of the column to be replaced.
 
-    def transform_column(
-        self, name: str, transformer: Callable[[Row], typing.Any]
-    ) -> Table:
-        """
-        Transform provided column by calling provided transformer.
+        new_column : Column
+            The new column replacing the old column.
 
         Returns
         -------
         result : Table
-            The table with the transformed column.
+            A table with the old column replaced by the new column.
 
         Raises
         ------
         UnknownColumnNameError
-            If the column does not exist.
-
-        """
-        if self.has_column(name):
-            items: list = [transformer(item) for item in self.to_rows()]
-            result: Column = Column(pd.Series(items), name)
-            return self.replace_column(name, result)
-        raise UnknownColumnNameError([name])
+            If the old column does not exist.
 
-    def summary(self) -> Table:
-        """
-        Return a table with a number of statistical key values.
+        DuplicateColumnNameError
+            If the new column already exists and the existing column is not affected by the replacement.
 
-        Returns
-        -------
-        result : Table
-            The table with statistics.
+        ColumnSizeError
+            If the size of the column does not match the amount of rows.
         """
+        if old_column_name not in self._schema.get_column_names():
+            raise UnknownColumnNameError([old_column_name])
 
-        columns = self.to_columns()
-        result = pd.DataFrame()
-        statistics = {}
-
-        for column in columns:
-            statistics = {
-                "maximum": column.maximum,
-                "minimum": column.minimum,
-                "mean": column.mean,
-                "mode": column.mode,
-                "median": column.median,
-                "sum": column.sum,
-                "variance": column.variance,
-                "standard deviation": column.standard_deviation,
-                "idness": column.idness,
-                "stability": column.stability,
-                "row count": column.count,
-            }
-            values = []
-
-            for function in statistics.values():
-                try:
-                    values.append(str(function()))
-                except NonNumericColumnError:
-                    values.append("-")
+        if (
+            new_column.name in self._schema.get_column_names()
+            and new_column.name != old_column_name
+        ):
+            raise DuplicateColumnNameError(new_column.name)
 
-            result = pd.concat([result, pd.DataFrame(values)], axis=1)
+        if self.count_rows() != new_column._data.size:
+            raise ColumnSizeError(str(self.count_rows()), str(new_column._data.size))
 
-        result = pd.concat([pd.DataFrame(list(statistics.keys())), result], axis=1)
-        result.columns = ["metrics"] + self.get_column_names()
+        if old_column_name != new_column.name:
+            renamed_table = self.rename_column(old_column_name, new_column.name)
+            result = renamed_table._data
+            result.columns = renamed_table._schema.get_column_names()
+        else:
+            result = self._data.copy()
+            result.columns = self._schema.get_column_names()
 
+        result[new_column.name] = new_column._data
         return Table(result)
 
-    def __repr__(self) -> str:
-        tmp = self._data.copy(deep=True)
-        tmp.columns = self.get_column_names()
-        return tmp.__repr__()
-
-    def __str__(self) -> str:
-        tmp = self._data.copy(deep=True)
-        tmp.columns = self.get_column_names()
-        return tmp.__str__()
-
-    def _ipython_display_(self) -> DisplayHandle:
+    def shuffle(self) -> Table:
         """
-        Return a display object for the column to be used in Jupyter Notebooks.
+        Shuffle the table randomly.
 
         Returns
         -------
-        output : DisplayHandle
-            Output object.
-        """
-        tmp = self._data.copy(deep=True)
-        tmp.columns = self.get_column_names()
+        result : Table
+            The shuffled Table.
 
-        with pd.option_context(
-            "display.max_rows", tmp.shape[0], "display.max_columns", tmp.shape[1]
-        ):
-            return display(tmp)
+        """
+        new_df = self._data.sample(frac=1.0)
+        new_df.columns = self._schema.get_column_names()
+        return Table(new_df)
 
     def slice(
         self,
@@ -933,9 +812,39 @@ def slice(
             raise ValueError("the given index is out of bounds")
 
         new_df = self._data.iloc[start:end:step]
-        new_df.columns = self.schema.get_column_names()
+        new_df.columns = self._schema.get_column_names()
         return Table(new_df)
 
+    def sort_columns(
+        self,
+        query: Callable[[Column, Column], int] = lambda col1, col2: (
+                                                                        col1.name > col2.name
+                                                                    )
+                                                                    - (col1.name < col2.name),
+    ) -> Table:
+        """
+        Sort a table with the given lambda function.
+        If no function is given the columns will be sorted alphabetically.
+        This function uses the default python sort algorithm.
+        The query returns
+        0, if both columns are equal.
+        < 0, if the first column should be ordered after the second column.
+        > 0, if the first column should be ordered before the second column.
+
+        Parameters
+        ----------
+        query : a lambda function
+            The lambda function used to sort the columns.
+
+        Returns
+        -------
+        new_table : Table
+            A new table with sorted columns.
+        """
+        columns = self.to_columns()
+        columns.sort(key=functools.cmp_to_key(query))
+        return Table.from_columns(columns)
+
     def split(self, percentage_in_first: float) -> typing.Tuple[Table, Table]:
         """
         Split the table into two new tables.
@@ -959,19 +868,32 @@ def split(self, percentage_in_first: float) -> typing.Tuple[Table, Table]:
             self.slice(round(percentage_in_first * self.count_rows())),
         )
 
-    def shuffle(self) -> Table:
+    def transform_column(
+        self, name: str, transformer: Callable[[Row], typing.Any]
+    ) -> Table:
         """
-        Shuffle the table randomly.
+        Transform provided column by calling provided transformer.
 
         Returns
         -------
         result : Table
-            The shuffled Table.
+            The table with the transformed column.
+
+        Raises
+        ------
+        UnknownColumnNameError
+            If the column does not exist.
 
         """
-        new_df = self._data.sample(frac=1.0)
-        new_df.columns = self.schema.get_column_names()
-        return Table(new_df)
+        if self.has_column(name):
+            items: list = [transformer(item) for item in self.to_rows()]
+            result: Column = Column(pd.Series(items), name)
+            return self.replace_column(name, result)
+        raise UnknownColumnNameError([name])
+
+    # ------------------------------------------------------------------------------------------------------------------
+    # Plotting
+    # ------------------------------------------------------------------------------------------------------------------
 
     def correlation_heatmap(self) -> None:
         """
@@ -1015,8 +937,8 @@ def lineplot(self, x_column_name: str, y_column_name: str) -> None:
 
         ax = sns.lineplot(
             data=self._data,
-            x=self.schema._get_column_index_by_name(x_column_name),
-            y=self.schema._get_column_index_by_name(y_column_name),
+            x=self._schema._get_column_index_by_name(x_column_name),
+            y=self._schema._get_column_index_by_name(y_column_name),
         )
         ax.set(xlabel=x_column_name, ylabel=y_column_name)
         ax.set_xticks(ax.get_xticks())
@@ -1049,8 +971,8 @@ def scatterplot(self, x_column_name: str, y_column_name: str) -> None:
 
         ax = sns.scatterplot(
             data=self._data,
-            x=self.schema._get_column_index_by_name(x_column_name),
-            y=self.schema._get_column_index_by_name(y_column_name),
+            x=self._schema._get_column_index_by_name(x_column_name),
+            y=self._schema._get_column_index_by_name(y_column_name),
         )
         ax.set(xlabel=x_column_name, ylabel=y_column_name)
         ax.set_xticks(ax.get_xticks())
@@ -1059,3 +981,130 @@ def scatterplot(self, x_column_name: str, y_column_name: str) -> None:
         )  # rotate the labels of the x Axis to prevent the chance of overlapping of the labels
         plt.tight_layout()
         plt.show()
+
+    # ------------------------------------------------------------------------------------------------------------------
+    # Conversion
+    # ------------------------------------------------------------------------------------------------------------------
+
+    def to_csv(self, path_to_file: str) -> None:
+        """
+        Write the data from the table into a CSV file.
+        If the file and/or the directories do not exist they will be created.
+        If the file already exists it will be overwritten.
+
+        Parameters
+        ----------
+        path_to_file : str
+            The path to the output file.
+        """
+        Path(os.path.dirname(path_to_file)).mkdir(parents=True, exist_ok=True)
+        data_to_csv = self._data.copy()
+        data_to_csv.columns = self._schema.get_column_names()
+        data_to_csv.to_csv(path_to_file, index=False)
+
+    def to_json(self, path_to_file: str) -> None:
+        """
+        Write the data from the table into a JSON file.
+        If the file and/or the directories do not exist, they will be created.
+        If the file already exists it will be overwritten.
+
+        Parameters
+        ----------
+        path_to_file : str
+            The path to the output file.
+        """
+        Path(os.path.dirname(path_to_file)).mkdir(parents=True, exist_ok=True)
+        data_to_json = self._data.copy()
+        data_to_json.columns = self._schema.get_column_names()
+        data_to_json.to_json(path_to_file)
+
+    def to_columns(self) -> list[Column]:
+        """
+        Return a list of the columns.
+
+        Returns
+        -------
+        columns : list[Columns]
+            List of columns.
+        """
+        return [self.get_column(name) for name in self._schema.get_column_names()]
+
+    def to_rows(self) -> list[Row]:
+        """
+        Return a list of the rows.
+
+        Returns
+        -------
+        rows : list[Row]
+            List of rows.
+        """
+        return [
+            Row(series_row, self._schema) for (_, series_row) in self._data.iterrows()
+        ]
+
+    # ------------------------------------------------------------------------------------------------------------------
+    # Other
+    # ------------------------------------------------------------------------------------------------------------------
+
+    def _ipython_display_(self) -> DisplayHandle:
+        """
+        Return a display object for the column to be used in Jupyter Notebooks.
+
+        Returns
+        -------
+        output : DisplayHandle
+            Output object.
+        """
+        tmp = self._data.copy(deep=True)
+        tmp.columns = self.get_column_names()
+
+        with pd.option_context(
+            "display.max_rows", tmp.shape[0], "display.max_columns", tmp.shape[1]
+        ):
+            return display(tmp)
+
+    def list_columns_with_missing_values(self) -> list[Column]:
+        """
+        Return a list of all the columns that have at least one missing value. Returns an empty list if there are none.
+
+        Returns
+        -------
+        columns_with_missing_values: list[Column]
+            The list of columns with missing values.
+        """
+        columns = self.to_columns()
+        columns_with_missing_values = []
+        for column in columns:
+            if column.has_missing_values():
+                columns_with_missing_values.append(column)
+        return columns_with_missing_values
+
+    def list_columns_with_non_numerical_values(self) -> list[Column]:
+        """
+        Return a list of columns only containing non-numerical values.
+
+        Returns
+        -------
+        cols : list[Column]
+            The list with only non-numerical columns.
+        """
+        cols = []
+        for column_name, data_type in self._schema._schema.items():
+            if not data_type.is_numeric():
+                cols.append(self.get_column(column_name))
+        return cols
+
+    def list_columns_with_numerical_values(self) -> list[Column]:
+        """
+        Return a list of columns only containing numerical values.
+
+        Returns
+        -------
+        cols : list[Column]
+            The list with only numerical columns.
+        """
+        cols = []
+        for column_name, data_type in self._schema._schema.items():
+            if data_type.is_numeric():
+                cols.append(self.get_column(column_name))
+        return cols
diff --git a/src/safeds/data/tabular/transformation/_imputer.py b/src/safeds/data/tabular/transformation/_imputer.py
index ef4015b02..45862778f 100644
--- a/src/safeds/data/tabular/transformation/_imputer.py
+++ b/src/safeds/data/tabular/transformation/_imputer.py
@@ -120,9 +120,7 @@ def transform(self, table: Table) -> Table:
         data[indices] = pd.DataFrame(
             self._imp.transform(data[indices]), columns=indices
         )
-        table_imputed = Table(data)
-        table_imputed.schema = table.schema
-        return table_imputed
+        return Table(data, table.schema)
 
     def fit_transform(
         self, table: Table, column_names: Optional[list[str]] = None
diff --git a/tests/safeds/data/tabular/containers/_table/test_drop_columns_with_non_numerical_values.py b/tests/safeds/data/tabular/containers/_table/test_drop_columns_with_non_numerical_values.py
new file mode 100644
index 000000000..f55d3533e
--- /dev/null
+++ b/tests/safeds/data/tabular/containers/_table/test_drop_columns_with_non_numerical_values.py
@@ -0,0 +1,30 @@
+import numpy as np
+import pandas as pd
+from safeds.data.tabular.containers import Table
+from safeds.data.tabular.typing import ColumnType, TableSchema
+
+
+def test_drop_columns_with_non_numerical_values_valid() -> None:
+    table = Table(
+        pd.DataFrame(
+            data={
+                "col1": ["A", "B", "C", "A"],
+                "col2": ["Test1", "Test1", "Test3", "Test1"],
+                "col3": [1, 2, 3, 4],
+                "col4": [2, 3, 1, 4],
+            }
+        )
+    )
+    columns = table.drop_columns_with_non_numerical_values()
+    assert columns[0] == table.get_column("col3")
+    assert columns[1] == table.get_column("col4")
+    assert len(columns) == 2
+
+
+def test_drop_columns_with_non_numerical_values_invalid() -> None:
+    table = Table(
+        [], TableSchema({"col1": ColumnType.from_numpy_dtype(np.dtype(float))})
+    )
+    columns = table.drop_columns_with_non_numerical_values()
+    assert columns[0] == table.get_column("col1")
+    assert len(columns) == 1

From 553cf25a80aea9447aea1bddb4129529b2021ccc Mon Sep 17 00:00:00 2001
From: Lars Reimann <mail@larsreimann.com>
Date: Mon, 27 Mar 2023 16:40:04 +0200
Subject: [PATCH 2/3] feat: function to drop columns with non-numerical values
 from `Table`

---
 src/safeds/data/tabular/containers/_table.py        | 12 ++++++++++++
 .../test_drop_columns_with_non_numerical_values.py  | 13 +++++--------
 2 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py
index de05e8fa3..1e1137a5a 100644
--- a/src/safeds/data/tabular/containers/_table.py
+++ b/src/safeds/data/tabular/containers/_table.py
@@ -574,6 +574,18 @@ def drop_columns(self, column_names: list[str]) -> Table:
         )
         return Table(transformed_data)
 
+    def drop_columns_with_non_numerical_values(self) -> Table:
+        """
+        Return a table without the columns that contain non-numerical values.
+
+        Returns
+        -------
+        table : Table
+            A table without the columns that contain non-numerical values.
+
+        """
+        return Table.from_columns(self.list_columns_with_numerical_values())
+
     def drop_duplicate_rows(self) -> Table:
         """
         Return a copy of the table with every duplicate row removed.
diff --git a/tests/safeds/data/tabular/containers/_table/test_drop_columns_with_non_numerical_values.py b/tests/safeds/data/tabular/containers/_table/test_drop_columns_with_non_numerical_values.py
index f55d3533e..d847a49d4 100644
--- a/tests/safeds/data/tabular/containers/_table/test_drop_columns_with_non_numerical_values.py
+++ b/tests/safeds/data/tabular/containers/_table/test_drop_columns_with_non_numerical_values.py
@@ -15,16 +15,13 @@ def test_drop_columns_with_non_numerical_values_valid() -> None:
             }
         )
     )
-    columns = table.drop_columns_with_non_numerical_values()
-    assert columns[0] == table.get_column("col3")
-    assert columns[1] == table.get_column("col4")
-    assert len(columns) == 2
+    updated_table = table.drop_columns_with_non_numerical_values()
+    assert updated_table.get_column_names() == ["col3", "col4"]
 
 
-def test_drop_columns_with_non_numerical_values_invalid() -> None:
+def test_drop_columns_with_non_numerical_values_empty() -> None:
     table = Table(
         [], TableSchema({"col1": ColumnType.from_numpy_dtype(np.dtype(float))})
     )
-    columns = table.drop_columns_with_non_numerical_values()
-    assert columns[0] == table.get_column("col1")
-    assert len(columns) == 1
+    updated_table = table.drop_columns_with_non_numerical_values()
+    assert updated_table.get_column_names() == ["col1"]

From f44376a3a3057c3b910c8393499db497b32ba631 Mon Sep 17 00:00:00 2001
From: lars-reimann <lars-reimann@users.noreply.github.com>
Date: Mon, 27 Mar 2023 14:43:34 +0000
Subject: [PATCH 3/3] style: apply automated linter fixes

---
 src/safeds/data/tabular/containers/_table.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py
index 1e1137a5a..f34ee6c02 100644
--- a/src/safeds/data/tabular/containers/_table.py
+++ b/src/safeds/data/tabular/containers/_table.py
@@ -12,8 +12,6 @@
 import seaborn as sns
 from IPython.core.display_functions import DisplayHandle, display
 from pandas import DataFrame, Series
-from scipy import stats
-
 from safeds.data.tabular.containers._column import Column
 from safeds.data.tabular.containers._row import Row
 from safeds.data.tabular.typing import ColumnType, TableSchema
@@ -28,6 +26,7 @@
     SchemaMismatchError,
     UnknownColumnNameError,
 )
+from scipy import stats
 
 
 # noinspection PyProtectedMember
@@ -273,7 +272,7 @@ def get_column(self, column_name: str) -> Column:
         if self._schema.has_column(column_name):
             output_column = Column(
                 self._data.iloc[
-                :, [self._schema._get_column_index_by_name(column_name)]
+                    :, [self._schema._get_column_index_by_name(column_name)]
                 ].squeeze(),
                 column_name,
                 self._schema.get_type_of_column(column_name),
@@ -830,9 +829,9 @@ def slice(
     def sort_columns(
         self,
         query: Callable[[Column, Column], int] = lambda col1, col2: (
-                                                                        col1.name > col2.name
-                                                                    )
-                                                                    - (col1.name < col2.name),
+            col1.name > col2.name
+        )
+        - (col1.name < col2.name),
     ) -> Table:
         """
         Sort a table with the given lambda function.