Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 8 additions & 24 deletions benchmarks/table/row_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,10 @@
REPETITIONS = 10


def _run_add_rows() -> None:
table.add_rows(table)


def _run_get_row() -> None:
table.get_row(0)


def _run_group_rows() -> None:
table.group_rows(lambda row: row.get_value("column_0") % 2 == 0)


def _run_keep_only_rows() -> None:
table.keep_only_rows(lambda row: row.get_value("column_0") % 2 == 0)


def _run_remove_duplicate_rows() -> None:
table.remove_duplicate_rows()

Expand Down Expand Up @@ -59,28 +47,20 @@ def _run_to_rows() -> None:
table.to_rows()


def _run_transform_column() -> None:
table.transform_column("column_0", lambda row: row.get_value("column_0") * 2)


if __name__ == "__main__":
# Create a synthetic Table
table = create_synthetic_table(1000, 50)

# Run the benchmarks
timings: dict[str, float] = {
"add_rows": timeit(
_run_add_rows,
number=REPETITIONS,
),
"get_row": timeit(
_run_get_row,
number=REPETITIONS,
),
"group_rows": timeit(
_run_group_rows,
number=REPETITIONS,
),
"keep_only_rows": timeit(
_run_keep_only_rows,
number=REPETITIONS,
),
"remove_duplicate_rows": timeit(
_run_remove_duplicate_rows,
number=REPETITIONS,
Expand Down Expand Up @@ -117,6 +97,10 @@ def _run_to_rows() -> None:
_run_to_rows,
number=REPETITIONS,
),
"transform_colum": timeit(
_run_transform_column,
number=REPETITIONS,
),
}

# Print the timings
Expand Down
140 changes: 59 additions & 81 deletions benchmarks/table/row_operations_polars.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,6 @@
REPETITIONS = 10


# def _run_add_rows() -> None:
# table.add_rows(table)
#
#
# def _run_get_row() -> None:
# table.get_row(0)
#
#
# def _run_group_rows() -> None:
# table.group_rows(lambda row: row.get_value("column_0") % 2 == 0)
#
#
# def _run_keep_only_rows() -> None:
# table.keep_only_rows(lambda row: row.get_value("column_0") % 2 == 0)


def _run_remove_duplicate_rows() -> None:
table.remove_duplicate_rows()._lazy_frame.collect()

Expand All @@ -33,30 +17,36 @@ def _run_remove_rows_with_missing_values() -> None:

# def _run_remove_rows_with_outliers() -> None:
# table.remove_rows_with_outliers()
#
#
# def _run_remove_rows() -> None:
# table.remove_rows(lambda row: row.get_value("column_0") % 2 == 0)
#
#
# def _run_shuffle_rows() -> None:
# table.shuffle_rows()
#
#
# def _run_slice_rows() -> None:
# table.slice_rows(end=table.number_of_rows // 2)
#
#
# def _run_sort_rows() -> None:
# table.sort_rows(lambda row1, row2: row1.get_value("column_0") - row2.get_value("column_0"))
#
#
# def _run_split_rows() -> None:
# table.split_rows(0.5)
#
#
# def _run_to_rows() -> None:
# table.to_rows()


def _run_remove_rows() -> None:
table.remove_rows(lambda row: row.get_value("column_0") % 2 == 0)._lazy_frame.collect()


def _run_shuffle_rows() -> None:
table.shuffle_rows()._lazy_frame.collect()


def _run_slice_rows() -> None:
table.slice_rows(length=table.number_of_rows // 2)._lazy_frame.collect()


def _run_sort_rows() -> None:
table.sort_rows(lambda row: row.get_value("column_0"))._lazy_frame.collect()


def _run_sort_rows_by_column() -> None:
table.sort_rows_by_column("column_0")._lazy_frame.collect()


def _run_split_rows() -> None:
table_1, table_2 = table.split_rows(0.5)
table_1._lazy_frame.collect()
table_2._lazy_frame.collect()


def _run_transform_column() -> None:
table.transform_column("column_0", lambda value: value * 2)._lazy_frame.collect()


if __name__ == "__main__":
Expand All @@ -65,22 +55,6 @@ def _run_remove_rows_with_missing_values() -> None:

# Run the benchmarks
timings: dict[str, float] = {
# "add_rows": timeit(
# _run_add_rows,
# number=REPETITIONS,
# ),
# "get_row": timeit(
# _run_get_row,
# number=REPETITIONS,
# ),
# "group_rows": timeit(
# _run_group_rows,
# number=REPETITIONS,
# ),
# "keep_only_rows": timeit(
# _run_keep_only_rows,
# number=REPETITIONS,
# ),
"remove_duplicate_rows": timeit(
_run_remove_duplicate_rows,
number=REPETITIONS,
Expand All @@ -93,36 +67,40 @@ def _run_remove_rows_with_missing_values() -> None:
# _run_remove_rows_with_outliers,
# number=REPETITIONS,
# ),
# "remove_rows": timeit(
# _run_remove_rows,
# number=REPETITIONS,
# ),
# "shuffle_rows": timeit(
# _run_shuffle_rows,
# number=REPETITIONS,
# ),
# "slice_rows": timeit(
# _run_slice_rows,
# number=REPETITIONS,
# ),
# "sort_rows": timeit(
# _run_sort_rows,
# number=REPETITIONS,
# ),
# "split_rows": timeit(
# _run_split_rows,
# number=REPETITIONS,
# ),
# "to_rows": timeit(
# _run_to_rows,
# number=REPETITIONS,
# ),
"remove_rows": timeit(
_run_remove_rows,
number=REPETITIONS,
),
"shuffle_rows": timeit(
_run_shuffle_rows,
number=REPETITIONS,
),
"slice_rows": timeit(
_run_slice_rows,
number=REPETITIONS,
),
"sort_rows": timeit(
_run_sort_rows,
number=REPETITIONS,
),
"sort_rows_by_column": timeit(
_run_sort_rows_by_column,
number=REPETITIONS,
),
"split_rows": timeit(
_run_split_rows,
number=REPETITIONS,
),
"transform_column": timeit(
_run_transform_column,
number=REPETITIONS,
),
}

# Print the timings
print(
Table(
{ # noqa: T201
{
"method": list(timings.keys()),
"timing": list(timings.values()),
}
Expand Down
6 changes: 3 additions & 3 deletions benchmarks/table/utils/create_synthetic_table_polars.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from random import randrange

from safeds.data.tabular.containers import ExperimentalPolarsTable
from safeds.data.tabular.containers import ExperimentalTable


def create_synthetic_table_polars(
Expand All @@ -9,7 +9,7 @@ def create_synthetic_table_polars(
*,
min_value: int = 0,
max_value: int = 1000,
) -> ExperimentalPolarsTable:
) -> ExperimentalTable:
"""Create a synthetic Table with random numerical data.

Parameters
Expand All @@ -28,7 +28,7 @@ def create_synthetic_table_polars(
Table
A Table with random numerical data.
"""
return ExperimentalPolarsTable(
return ExperimentalTable(
{
f"column_{i}": [randrange(min_value, max_value) for _ in range(number_of_rows)]
for i in range(number_of_columns)
Expand Down
20 changes: 11 additions & 9 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ matplotlib = "^3.6.3"
openpyxl = "^3.1.2"
pandas = "^2.0.0"
pillow = ">=9.5,<11.0"
polars = {extras = ["pyarrow"], version = "^0.20.23"}
polars = {extras = ["numpy", "pyarrow"], version = "^0.20.24"}
scikit-learn = "^1.2.0"
seaborn = "^0.13.0"
statsmodels = "^0.14.1"
Expand Down
Binary file modified src/resources/to_excel_file.xlsx
Binary file not shown.
24 changes: 12 additions & 12 deletions src/safeds/data/tabular/containers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,32 +6,32 @@

if TYPE_CHECKING:
from ._column import Column
from ._experimental_polars_cell import ExperimentalPolarsCell
from ._experimental_polars_column import ExperimentalPolarsColumn
from ._experimental_polars_row import ExperimentalPolarsRow
from ._experimental_polars_table import ExperimentalPolarsTable
from ._experimental_cell import ExperimentalCell
from ._experimental_column import ExperimentalColumn
from ._experimental_row import ExperimentalRow
from ._experimental_table import ExperimentalTable
from ._row import Row
from ._table import Table

apipkg.initpkg(
__name__,
{
"Column": "._column:Column",
"ExperimentalPolarsCell": "._experimental_polars_cell:ExperimentalPolarsCell",
"ExperimentalPolarsColumn": "._experimental_polars_column:ExperimentalPolarsColumn",
"ExperimentalPolarsRow": "._experimental_polars_row:ExperimentalPolarsRow",
"ExperimentalPolarsTable": "._experimental_polars_table:ExperimentalPolarsTable",
"ExperimentalCell": "._experimental_cell:ExperimentalCell",
"ExperimentalColumn": "._experimental_column:ExperimentalColumn",
"ExperimentalRow": "._experimental_row:ExperimentalRow",
"ExperimentalTable": "._experimental_table:ExperimentalTable",
"Row": "._row:Row",
"Table": "._table:Table",
},
)

__all__ = [
"Column",
"ExperimentalPolarsCell",
"ExperimentalPolarsColumn",
"ExperimentalPolarsRow",
"ExperimentalPolarsTable",
"ExperimentalCell",
"ExperimentalColumn",
"ExperimentalRow",
"ExperimentalTable",
"Row",
"Table",
]
Loading