Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions src/safeds/data/tabular/containers/_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -688,9 +688,12 @@ def summarize_statistics(self) -> Table:
self.stability(),
]
else:
min_ = self.min()
max_ = self.max()

values = [
str(self.min() or "-"),
str(self.max() or "-"),
str("-" if min_ is None else min_),
str("-" if max_ is None else max_),
"-",
"-",
"-",
Expand Down Expand Up @@ -1055,7 +1058,8 @@ def stability(self) -> float:
if non_missing.len() == 0:
return 1.0 # All non-null values are the same (since there is are none)

mode_count = non_missing.unique_counts().max()
# `unique_counts` crashes in polars for boolean columns
mode_count = non_missing.value_counts().get_column("count").max()

return mode_count / non_missing.len()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,15 @@
([None], 1),
([1, 2, 3, 4], 1 / 4),
(["b", "a", "abc", "abc", "abc"], 3 / 5),
([True, False, True, True, None], 3 / 4),
],
ids=[
"empty",
"some missing values",
"only missing values",
"numeric",
"non-numeric",
"boolean", # caused a crash in previous implementation
],
)
def test_should_return_stability_of_column(values: list[Any], expected: float) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,36 @@
@pytest.mark.parametrize(
("column", "expected"),
[
(
( # boolean
Column("col", [True, False, True]),
Table(
{
"metric": [
"min",
"max",
"mean",
"median",
"standard deviation",
"distinct value count",
"idness",
"missing value ratio",
"stability",
],
"col": [
"False",
"True",
"-",
"-",
"-",
"2",
str(2 / 3),
"0.0",
str(2 / 3),
],
},
),
),
( # ints
Column("col", [1, 2, 1]),
Table(
{
Expand All @@ -25,18 +54,18 @@
"col": [
1,
2,
4.0 / 3,
1.0,
4 / 3,
1,
stdev([1, 2, 1]),
2,
2.0 / 3,
0.0,
2.0 / 3,
2 / 3,
0,
2 / 3,
],
},
),
),
(
( # strings
Column("col", ["a", "b", "c"]),
Table(
{
Expand Down Expand Up @@ -65,7 +94,7 @@
},
),
),
(
( # only missing
Column("col", [None, None]),
Table(
{
Expand Down Expand Up @@ -96,9 +125,10 @@
),
],
ids=[
"Column of ints",
"Column of strings",
"Column of None",
"boolean",
"ints",
"strings",
"only missing",
],
)
def test_should_summarize_statistics(column: Column, expected: Table) -> None:
Expand Down