Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
5e57ea8
feat: remove old tabular containers
lars-reimann May 10, 2024
4ea08e6
refactor: rename modules
lars-reimann May 10, 2024
09525a8
feat: remove `Experimental` prefixes
lars-reimann May 10, 2024
905a137
fix: imports
lars-reimann May 10, 2024
422d33c
fix: pytest top level collect failures
lars-reimann May 10, 2024
b3bfe33
fix: some failing tests
lars-reimann May 10, 2024
b6be205
feat: raise column name errors where appropriate
lars-reimann May 10, 2024
e519eca
test: fix some failing tests
lars-reimann May 10, 2024
b7bb338
feat: standalone metrics
lars-reimann May 10, 2024
41e0bf2
feat: package for converters and layers
lars-reimann May 10, 2024
73cca5a
perf: much faster classifier metrics
lars-reimann May 10, 2024
80dfb82
refactor: clean up classifier code
lars-reimann May 10, 2024
3d8aae6
feat: redesign classifier interface
lars-reimann May 11, 2024
1a5802a
feat: redesign classifier interface (2)
lars-reimann May 11, 2024
a4233dc
feat: redesign classifier interface (3)
lars-reimann May 11, 2024
a73ccef
feat: redesign regressor interface
lars-reimann May 11, 2024
5acaa84
refactor: extract common logic from classifiers and regressors
lars-reimann May 11, 2024
a07c0d7
docs: better formatting
lars-reimann May 11, 2024
b971647
refactor: also extract common logic for SVMs
lars-reimann May 11, 2024
1c9eff9
refactor: default implementation for two template methods
lars-reimann May 11, 2024
5ff0653
refactor: override `SVM.kernel` in subclasses to declare public retur…
lars-reimann May 11, 2024
9a40b88
feat: better metrics
lars-reimann May 11, 2024
8aaab90
docs: fix copy & paste error
lars-reimann May 11, 2024
76107b2
refactor: move utils into common superclass
lars-reimann May 11, 2024
d39c78f
test: fix some failing tests
lars-reimann May 11, 2024
897e571
test: fix some failing tests
lars-reimann May 11, 2024
788398f
test: fix some failing tests
lars-reimann May 11, 2024
ca35d8e
test: fix some failing tests
lars-reimann May 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
71 changes: 71 additions & 0 deletions benchmarks/metrics/classification.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from __future__ import annotations

from timeit import timeit
from typing import TYPE_CHECKING

import polars as pl

from benchmarks.table.utils import create_synthetic_table
from safeds.data.tabular.containers import Table
from safeds.ml.metrics import ClassificationMetrics


REPETITIONS = 10


def _run_accuracy() -> None:
ClassificationMetrics.accuracy(table.get_column("predicted"), table.get_column("expected"))


def _run_f1_score() -> None:
ClassificationMetrics.f1_score(table.get_column("predicted"), table.get_column("expected"), 1)


def _run_precision() -> None:
ClassificationMetrics.precision(table.get_column("predicted"), table.get_column("expected"), 1)


def _run_recall() -> None:
ClassificationMetrics.recall(table.get_column("predicted"), table.get_column("expected"), 1)


if __name__ == "__main__":
# Create a synthetic Table
table = (
create_synthetic_table(10000, 2)
.rename_column("column_0", "predicted")
.rename_column("column_1", "expected")
)

# Run the benchmarks
timings: dict[str, float] = {
"accuracy": timeit(
_run_accuracy,
number=REPETITIONS,
),
"f1_score": timeit(
_run_f1_score,
number=REPETITIONS,
),
"precision": timeit(
_run_precision,
number=REPETITIONS,
),
"recall": timeit(
_run_recall,
number=REPETITIONS,
),
}

# Print the timings
with pl.Config(
tbl_rows=-1,
):
print(
Table(
{
"method": list(timings.keys()),
"timing": list(timings.values()),
}
)
)
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from timeit import timeit

from safeds.data.tabular.containers import ExperimentalTable
from safeds.data.tabular.containers import Table

from benchmarks.table.utils import create_synthetic_table_polars
from benchmarks.table.utils import create_synthetic_table

REPETITIONS = 10

Expand All @@ -21,7 +21,7 @@ def _run_summarize_statistics() -> None:

if __name__ == "__main__":
# Create a synthetic Table
table = create_synthetic_table_polars(100, 5000)
table = create_synthetic_table(100, 5000)

# Run the benchmarks
timings: dict[str, float] = {
Expand All @@ -41,7 +41,7 @@ def _run_summarize_statistics() -> None:

# Print the timings
print(
ExperimentalTable(
Table(
{
"method": list(timings.keys()),
"timing": list(timings.values()),
Expand Down
69 changes: 38 additions & 31 deletions benchmarks/table/row_operations.py
Original file line number Diff line number Diff line change
@@ -1,54 +1,58 @@
from timeit import timeit

import polars as pl

from safeds.data.tabular.containers import Table

from benchmarks.table.utils import create_synthetic_table

REPETITIONS = 10


def _run_group_rows() -> None:
table.group_rows(lambda row: row.get_value("column_0") % 2 == 0)


def _run_remove_duplicate_rows() -> None:
table.remove_duplicate_rows()
table.remove_duplicate_rows()._lazy_frame.collect()


def _run_remove_rows_with_missing_values() -> None:
table.remove_rows_with_missing_values()
table.remove_rows_with_missing_values()._lazy_frame.collect()


def _run_remove_rows_with_outliers() -> None:
table.remove_rows_with_outliers()


def _run_remove_rows() -> None:
table.remove_rows(lambda row: row.get_value("column_0") % 2 == 0)
table.remove_rows(lambda row: row.get_value("column_0") % 2 == 0)._lazy_frame.collect()


def _run_remove_rows_by_column() -> None:
table.remove_rows_by_column("column_0", lambda cell: cell % 2 == 0)._lazy_frame.collect()


def _run_shuffle_rows() -> None:
table.shuffle_rows()
table.shuffle_rows()._lazy_frame.collect()


def _run_slice_rows() -> None:
table.slice_rows(end=table.number_of_rows // 2)
table.slice_rows(length=table.number_of_rows // 2)._lazy_frame.collect()


def _run_sort_rows() -> None:
table.sort_rows(lambda row1, row2: row1.get_value("column_0") - row2.get_value("column_0"))
table.sort_rows(lambda row: row.get_value("column_0"))._lazy_frame.collect()


def _run_split_rows() -> None:
table.split_rows(0.5)
def _run_sort_rows_by_column() -> None:
table.sort_rows_by_column("column_0")._lazy_frame.collect()


def _run_to_rows() -> None:
table.to_rows()
def _run_split_rows() -> None:
table_1, table_2 = table.split_rows(0.5)
table_1._lazy_frame.collect()
table_2._lazy_frame.collect()


def _run_transform_column() -> None:
table.transform_column("column_0", lambda row: row.get_value("column_0") * 2)
table.transform_column("column_0", lambda value: value * 2)._lazy_frame.collect()


if __name__ == "__main__":
Expand All @@ -57,10 +61,6 @@ def _run_transform_column() -> None:

# Run the benchmarks
timings: dict[str, float] = {
"group_rows": timeit(
_run_group_rows,
number=REPETITIONS,
),
"remove_duplicate_rows": timeit(
_run_remove_duplicate_rows,
number=REPETITIONS,
Expand All @@ -77,6 +77,10 @@ def _run_transform_column() -> None:
_run_remove_rows,
number=REPETITIONS,
),
"remove_rows_by_column": timeit(
_run_remove_rows_by_column,
number=REPETITIONS,
),
"shuffle_rows": timeit(
_run_shuffle_rows,
number=REPETITIONS,
Expand All @@ -89,26 +93,29 @@ def _run_transform_column() -> None:
_run_sort_rows,
number=REPETITIONS,
),
"split_rows": timeit(
_run_split_rows,
"sort_rows_by_column": timeit(
_run_sort_rows_by_column,
number=REPETITIONS,
),
"to_rows": timeit(
_run_to_rows,
"split_rows": timeit(
_run_split_rows,
number=REPETITIONS,
),
"transform_colum": timeit(
"transform_column": timeit(
_run_transform_column,
number=REPETITIONS,
),
}

# Print the timings
print(
Table(
{ # noqa: T201
"method": list(timings.keys()),
"timing": list(timings.values()),
}
with pl.Config(
tbl_rows=-1,
):
print(
Table(
{
"method": list(timings.keys()),
"timing": list(timings.values()),
}
)
)
)
121 changes: 0 additions & 121 deletions benchmarks/table/row_operations_polars.py

This file was deleted.

2 changes: 0 additions & 2 deletions benchmarks/table/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from .create_synthetic_table import create_synthetic_table
from .create_synthetic_table_polars import create_synthetic_table_polars

__all__ = [
"create_synthetic_table",
"create_synthetic_table_polars",
]
Loading