diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 4d87d1fae..3bc04dc64 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -786,32 +786,34 @@ def summarize_statistics(self) -> Table: >>> from safeds.data.tabular.containers import Table >>> table = Table.from_dict({"a": [1, 3], "b": [2, 4]}) >>> table.summarize_statistics() - metrics a b - 0 maximum 3 4 - 1 minimum 1 2 - 2 mean 2.0 3.0 - 3 mode [1, 3] [2, 4] - 4 median 2.0 3.0 - 5 sum 4 6 - 6 variance 2.0 2.0 - 7 standard deviation 1.4142135623730951 1.4142135623730951 - 8 idness 1.0 1.0 - 9 stability 0.5 0.5 + metric a b + 0 minimum 1 2 + 1 maximum 3 4 + 2 mean 2.0 3.0 + 3 mode [1, 3] [2, 4] + 4 median 2.0 3.0 + 5 variance 2.0 2.0 + 6 standard deviation 1.4142135623730951 1.4142135623730951 + 7 missing value count 0 0 + 8 missing value ratio 0.0 0.0 + 9 idness 1.0 1.0 + 10 stability 0.5 0.5 """ import pandas as pd if self.number_of_columns == 0: return Table( { - "metrics": [ - "maximum", + "metric": [ "minimum", + "maximum", "mean", "mode", "median", - "sum", "variance", "standard deviation", + "missing value count", + "missing value ratio", "idness", "stability", ], @@ -820,22 +822,23 @@ def summarize_statistics(self) -> Table: elif self.number_of_rows == 0: table = Table( { - "metrics": [ - "maximum", + "metric": [ "minimum", + "maximum", "mean", "mode", "median", - "sum", "variance", "standard deviation", + "missing value count", + "missing value ratio", "idness", "stability", ], }, ) for name in self.column_names: - table = table.add_column(Column(name, ["-", "-", "-", "-", "-", "-", "-", "-", "-", "-"])) + table = table.add_column(Column(name, ["-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-"])) return table columns = self.to_columns() @@ -844,14 +847,15 @@ def summarize_statistics(self) -> Table: for column in columns: statistics = { - "maximum": column.maximum, "minimum": column.minimum, + "maximum": column.maximum, "mean": column.mean, "mode": column.mode, "median": column.median, - "sum": column.sum, "variance": column.variance, "standard deviation": column.standard_deviation, + "missing value count": column.missing_value_count, + "missing value ratio": column.missing_value_ratio, "idness": column.idness, "stability": column.stability, } @@ -866,7 +870,7 @@ def summarize_statistics(self) -> Table: result = pd.concat([result, pd.DataFrame(values)], axis=1) result = pd.concat([pd.DataFrame(list(statistics.keys())), result], axis=1) - result.columns = ["metrics", *self.column_names] + result.columns = ["metric", *self.column_names] return Table._from_pandas_dataframe(result) diff --git a/tests/safeds/data/tabular/containers/_table/test_summarize_statistics.py b/tests/safeds/data/tabular/containers/_table/test_summarize_statistics.py index 54185e6ca..0fe30b2ac 100644 --- a/tests/safeds/data/tabular/containers/_table/test_summarize_statistics.py +++ b/tests/safeds/data/tabular/containers/_table/test_summarize_statistics.py @@ -11,27 +11,29 @@ Table({"col1": [1, 2, 1], "col2": ["a", "b", "c"]}), Table( { - "metrics": [ - "maximum", + "metric": [ "minimum", + "maximum", "mean", "mode", "median", - "sum", "variance", "standard deviation", + "missing value count", + "missing value ratio", "idness", "stability", ], "col1": [ - "2", "1", + "2", str(4.0 / 3), "[1]", "1.0", - "4", str(1.0 / 3), str(stdev([1, 2, 1])), + "0", + "0.0", str(2.0 / 3), str(2.0 / 3), ], @@ -43,7 +45,8 @@ "-", "-", "-", - "-", + "0", + "0.0", "1.0", str(1.0 / 3), ], @@ -54,15 +57,16 @@ Table(), Table( { - "metrics": [ - "maximum", + "metric": [ "minimum", + "maximum", "mean", "mode", "median", - "sum", "variance", "standard deviation", + "missing value count", + "missing value ratio", "idness", "stability", ], @@ -73,15 +77,16 @@ Table({"col": [], "gg": []}), Table( { - "metrics": [ - "maximum", + "metric": [ "minimum", + "maximum", "mean", "mode", "median", - "sum", "variance", "standard deviation", + "missing value count", + "missing value ratio", "idness", "stability", ], @@ -96,6 +101,7 @@ "-", "-", "-", + "-", ], "gg": [ "-", @@ -108,6 +114,7 @@ "-", "-", "-", + "-", ], }, ), @@ -116,25 +123,31 @@ Table({"col": [None, None]}), Table( { - "metrics": [ - "maximum", + "metric": [ "minimum", + "maximum", "mean", "mode", "median", - "sum", "variance", "standard deviation", + "missing value count", + "missing value ratio", "idness", "stability", ], - "col": ["-", "-", "-", "[]", "-", "-", "-", "-", "0.0", "-"], + "col": ["-", "-", "-", "[]", "-", "-", "-", "2", "1.0", "0.0", "-"], }, ), ), ], - ids=["Column of integers and Column of characters", "empty", "empty with columns", "Column of None"], + ids=[ + "Column of integers and Column of characters", + "empty", + "empty with columns", + "Column of None", + ], ) def test_should_summarize_statistics(table: Table, expected: Table) -> None: - assert expected.schema == table.summarize_statistics().schema - assert expected == table.summarize_statistics() + assert table.summarize_statistics().schema == expected.schema + assert table.summarize_statistics() == expected