From d46ec0eac483761698e18a052d8fef821a4de99c Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Wed, 1 May 2024 18:12:10 +0200 Subject: [PATCH 1/4] feat: remove `sum` --- src/safeds/data/tabular/containers/_table.py | 14 +++++-------- .../_table/test_summarize_statistics.py | 21 ++++++++----------- 2 files changed, 14 insertions(+), 21 deletions(-) diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 4d87d1fae..8ae174d2b 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -792,11 +792,10 @@ def summarize_statistics(self) -> Table: 2 mean 2.0 3.0 3 mode [1, 3] [2, 4] 4 median 2.0 3.0 - 5 sum 4 6 - 6 variance 2.0 2.0 - 7 standard deviation 1.4142135623730951 1.4142135623730951 - 8 idness 1.0 1.0 - 9 stability 0.5 0.5 + 5 variance 2.0 2.0 + 6 standard deviation 1.4142135623730951 1.4142135623730951 + 7 idness 1.0 1.0 + 8 stability 0.5 0.5 """ import pandas as pd @@ -809,7 +808,6 @@ def summarize_statistics(self) -> Table: "mean", "mode", "median", - "sum", "variance", "standard deviation", "idness", @@ -826,7 +824,6 @@ def summarize_statistics(self) -> Table: "mean", "mode", "median", - "sum", "variance", "standard deviation", "idness", @@ -835,7 +832,7 @@ def summarize_statistics(self) -> Table: }, ) for name in self.column_names: - table = table.add_column(Column(name, ["-", "-", "-", "-", "-", "-", "-", "-", "-", "-"])) + table = table.add_column(Column(name, ["-", "-", "-", "-", "-", "-", "-", "-", "-"])) return table columns = self.to_columns() @@ -849,7 +846,6 @@ def summarize_statistics(self) -> Table: "mean": column.mean, "mode": column.mode, "median": column.median, - "sum": column.sum, "variance": column.variance, "standard deviation": column.standard_deviation, "idness": column.idness, diff --git a/tests/safeds/data/tabular/containers/_table/test_summarize_statistics.py b/tests/safeds/data/tabular/containers/_table/test_summarize_statistics.py index 54185e6ca..18a9866e9 100644 --- a/tests/safeds/data/tabular/containers/_table/test_summarize_statistics.py +++ b/tests/safeds/data/tabular/containers/_table/test_summarize_statistics.py @@ -17,7 +17,6 @@ "mean", "mode", "median", - "sum", "variance", "standard deviation", "idness", @@ -29,7 +28,6 @@ str(4.0 / 3), "[1]", "1.0", - "4", str(1.0 / 3), str(stdev([1, 2, 1])), str(2.0 / 3), @@ -43,7 +41,6 @@ "-", "-", "-", - "-", "1.0", str(1.0 / 3), ], @@ -60,7 +57,6 @@ "mean", "mode", "median", - "sum", "variance", "standard deviation", "idness", @@ -79,7 +75,6 @@ "mean", "mode", "median", - "sum", "variance", "standard deviation", "idness", @@ -95,7 +90,6 @@ "-", "-", "-", - "-", ], "gg": [ "-", @@ -107,7 +101,6 @@ "-", "-", "-", - "-", ], }, ), @@ -122,19 +115,23 @@ "mean", "mode", "median", - "sum", "variance", "standard deviation", "idness", "stability", ], - "col": ["-", "-", "-", "[]", "-", "-", "-", "-", "0.0", "-"], + "col": ["-", "-", "-", "[]", "-", "-", "-", "0.0", "-"], }, ), ), ], - ids=["Column of integers and Column of characters", "empty", "empty with columns", "Column of None"], + ids=[ + "Column of integers and Column of characters", + "empty", + "empty with columns", + "Column of None", + ], ) def test_should_summarize_statistics(table: Table, expected: Table) -> None: - assert expected.schema == table.summarize_statistics().schema - assert expected == table.summarize_statistics() + assert table.summarize_statistics().schema == expected.schema + assert table.summarize_statistics() == expected From 82096049fd45f8c6ffde4b1af317b4c6b56918b4 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Wed, 1 May 2024 18:17:13 +0200 Subject: [PATCH 2/4] feat: swap `minimum` and `maximum` --- src/safeds/data/tabular/containers/_table.py | 10 +++++----- .../containers/_table/test_summarize_statistics.py | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 8ae174d2b..805370772 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -787,8 +787,8 @@ def summarize_statistics(self) -> Table: >>> table = Table.from_dict({"a": [1, 3], "b": [2, 4]}) >>> table.summarize_statistics() metrics a b - 0 maximum 3 4 - 1 minimum 1 2 + 0 minimum 1 2 + 1 maximum 3 4 2 mean 2.0 3.0 3 mode [1, 3] [2, 4] 4 median 2.0 3.0 @@ -803,8 +803,8 @@ def summarize_statistics(self) -> Table: return Table( { "metrics": [ - "maximum", "minimum", + "maximum", "mean", "mode", "median", @@ -819,8 +819,8 @@ def summarize_statistics(self) -> Table: table = Table( { "metrics": [ - "maximum", "minimum", + "maximum", "mean", "mode", "median", @@ -841,8 +841,8 @@ def summarize_statistics(self) -> Table: for column in columns: statistics = { - "maximum": column.maximum, "minimum": column.minimum, + "maximum": column.maximum, "mean": column.mean, "mode": column.mode, "median": column.median, diff --git a/tests/safeds/data/tabular/containers/_table/test_summarize_statistics.py b/tests/safeds/data/tabular/containers/_table/test_summarize_statistics.py index 18a9866e9..7fc83207b 100644 --- a/tests/safeds/data/tabular/containers/_table/test_summarize_statistics.py +++ b/tests/safeds/data/tabular/containers/_table/test_summarize_statistics.py @@ -12,8 +12,8 @@ Table( { "metrics": [ - "maximum", "minimum", + "maximum", "mean", "mode", "median", @@ -23,8 +23,8 @@ "stability", ], "col1": [ - "2", "1", + "2", str(4.0 / 3), "[1]", "1.0", @@ -52,8 +52,8 @@ Table( { "metrics": [ - "maximum", "minimum", + "maximum", "mean", "mode", "median", @@ -70,8 +70,8 @@ Table( { "metrics": [ - "maximum", "minimum", + "maximum", "mean", "mode", "median", @@ -110,8 +110,8 @@ Table( { "metrics": [ - "maximum", "minimum", + "maximum", "mean", "mode", "median", From 5bdd9f93dcb12eb80b87f2c127b0a1817a1e34df Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Wed, 1 May 2024 18:31:05 +0200 Subject: [PATCH 3/4] feat: show `missing value count` and `missing value ratio` --- src/safeds/data/tabular/containers/_table.py | 36 +++++++++++-------- .../_table/test_summarize_statistics.py | 26 +++++++++++--- 2 files changed, 43 insertions(+), 19 deletions(-) diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 805370772..d3a6ae529 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -786,23 +786,25 @@ def summarize_statistics(self) -> Table: >>> from safeds.data.tabular.containers import Table >>> table = Table.from_dict({"a": [1, 3], "b": [2, 4]}) >>> table.summarize_statistics() - metrics a b - 0 minimum 1 2 - 1 maximum 3 4 - 2 mean 2.0 3.0 - 3 mode [1, 3] [2, 4] - 4 median 2.0 3.0 - 5 variance 2.0 2.0 - 6 standard deviation 1.4142135623730951 1.4142135623730951 - 7 idness 1.0 1.0 - 8 stability 0.5 0.5 + metric a b + 0 minimum 1 2 + 1 maximum 3 4 + 2 mean 2.0 3.0 + 3 mode [1, 3] [2, 4] + 4 median 2.0 3.0 + 5 variance 2.0 2.0 + 6 standard deviation 1.4142135623730951 1.4142135623730951 + 7 missing value count 0 0 + 8 missing value ratio 0.0 0.0 + 9 idness 1.0 1.0 + 10 stability 0.5 0.5 """ import pandas as pd if self.number_of_columns == 0: return Table( { - "metrics": [ + "metric": [ "minimum", "maximum", "mean", @@ -810,6 +812,8 @@ def summarize_statistics(self) -> Table: "median", "variance", "standard deviation", + "missing value count", + "missing value ratio", "idness", "stability", ], @@ -818,7 +822,7 @@ def summarize_statistics(self) -> Table: elif self.number_of_rows == 0: table = Table( { - "metrics": [ + "metric": [ "minimum", "maximum", "mean", @@ -826,13 +830,15 @@ def summarize_statistics(self) -> Table: "median", "variance", "standard deviation", + "missing value count", + "missing value ratio", "idness", "stability", ], }, ) for name in self.column_names: - table = table.add_column(Column(name, ["-", "-", "-", "-", "-", "-", "-", "-", "-"])) + table = table.add_column(Column(name, ["-", "-", "-", "-", "-", "-","-","-", "-", "-", "-"])) return table columns = self.to_columns() @@ -848,6 +854,8 @@ def summarize_statistics(self) -> Table: "median": column.median, "variance": column.variance, "standard deviation": column.standard_deviation, + "missing value count": column.missing_value_count, + "missing value ratio": column.missing_value_ratio, "idness": column.idness, "stability": column.stability, } @@ -862,7 +870,7 @@ def summarize_statistics(self) -> Table: result = pd.concat([result, pd.DataFrame(values)], axis=1) result = pd.concat([pd.DataFrame(list(statistics.keys())), result], axis=1) - result.columns = ["metrics", *self.column_names] + result.columns = ["metric", *self.column_names] return Table._from_pandas_dataframe(result) diff --git a/tests/safeds/data/tabular/containers/_table/test_summarize_statistics.py b/tests/safeds/data/tabular/containers/_table/test_summarize_statistics.py index 7fc83207b..0fe30b2ac 100644 --- a/tests/safeds/data/tabular/containers/_table/test_summarize_statistics.py +++ b/tests/safeds/data/tabular/containers/_table/test_summarize_statistics.py @@ -11,7 +11,7 @@ Table({"col1": [1, 2, 1], "col2": ["a", "b", "c"]}), Table( { - "metrics": [ + "metric": [ "minimum", "maximum", "mean", @@ -19,6 +19,8 @@ "median", "variance", "standard deviation", + "missing value count", + "missing value ratio", "idness", "stability", ], @@ -30,6 +32,8 @@ "1.0", str(1.0 / 3), str(stdev([1, 2, 1])), + "0", + "0.0", str(2.0 / 3), str(2.0 / 3), ], @@ -41,6 +45,8 @@ "-", "-", "-", + "0", + "0.0", "1.0", str(1.0 / 3), ], @@ -51,7 +57,7 @@ Table(), Table( { - "metrics": [ + "metric": [ "minimum", "maximum", "mean", @@ -59,6 +65,8 @@ "median", "variance", "standard deviation", + "missing value count", + "missing value ratio", "idness", "stability", ], @@ -69,7 +77,7 @@ Table({"col": [], "gg": []}), Table( { - "metrics": [ + "metric": [ "minimum", "maximum", "mean", @@ -77,6 +85,8 @@ "median", "variance", "standard deviation", + "missing value count", + "missing value ratio", "idness", "stability", ], @@ -90,6 +100,8 @@ "-", "-", "-", + "-", + "-", ], "gg": [ "-", @@ -101,6 +113,8 @@ "-", "-", "-", + "-", + "-", ], }, ), @@ -109,7 +123,7 @@ Table({"col": [None, None]}), Table( { - "metrics": [ + "metric": [ "minimum", "maximum", "mean", @@ -117,10 +131,12 @@ "median", "variance", "standard deviation", + "missing value count", + "missing value ratio", "idness", "stability", ], - "col": ["-", "-", "-", "[]", "-", "-", "-", "0.0", "-"], + "col": ["-", "-", "-", "[]", "-", "-", "-", "2", "1.0", "0.0", "-"], }, ), ), From c4536ca176776b20150e767d7fcd3b98a311e7c2 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Wed, 1 May 2024 16:33:31 +0000 Subject: [PATCH 4/4] style: apply automated linter fixes --- src/safeds/data/tabular/containers/_table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index d3a6ae529..3bc04dc64 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -838,7 +838,7 @@ def summarize_statistics(self) -> Table: }, ) for name in self.column_names: - table = table.add_column(Column(name, ["-", "-", "-", "-", "-", "-","-","-", "-", "-", "-"])) + table = table.add_column(Column(name, ["-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-"])) return table columns = self.to_columns()