From 439c899c39544b6f343c7061e095dbda49484f18 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Mon, 6 May 2024 19:13:50 +0200 Subject: [PATCH 1/4] feat: polars implementation of a row --- .../containers/_experimental_polars_row.py | 110 +++++++++++++++++- .../containers/_experimental_polars_table.py | 20 ++-- .../_experimental_vectorized_cell.py | 10 ++ .../_experimental_vectorized_row.py | 105 +++++++++++++++++ 4 files changed, 235 insertions(+), 10 deletions(-) create mode 100644 src/safeds/data/tabular/containers/_experimental_vectorized_cell.py create mode 100644 src/safeds/data/tabular/containers/_experimental_vectorized_row.py diff --git a/src/safeds/data/tabular/containers/_experimental_polars_row.py b/src/safeds/data/tabular/containers/_experimental_polars_row.py index c6b43b06c..ed3be4ec4 100644 --- a/src/safeds/data/tabular/containers/_experimental_polars_row.py +++ b/src/safeds/data/tabular/containers/_experimental_polars_row.py @@ -1,7 +1,111 @@ from __future__ import annotations -from abc import ABC +from abc import ABC, abstractmethod +from typing import Any +from collections.abc import Mapping, Iterator +from safeds.data.tabular.containers import ExperimentalPolarsCell +from safeds.data.tabular.typing import Schema, ColumnType -class ExperimentalPolarsRow(ABC): # noqa: B024 - pass + +class ExperimentalPolarsRow(ABC, Mapping[str, Any]): + """A row is a one-dimensional collection of named, heterogeneous values.""" + + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + + def __contains__(self, name: Any) -> bool: + return self.has_column(name) + + @abstractmethod + def __eq__(self, other: object) -> bool: + ... + + def __getitem__(self, name: str) -> ExperimentalPolarsCell: + return self.get_value(name) + + @abstractmethod + def __hash__(self) -> int: + ... + + def __iter__(self) -> Iterator[Any]: + return iter(self.column_names) + + def __len__(self) -> int: + return self.number_of_columns + + @abstractmethod + def __sizeof__(self) -> int: + ... + + # ------------------------------------------------------------------------------------------------------------------ + # Properties + # ------------------------------------------------------------------------------------------------------------------ + + @property + @abstractmethod + def column_names(self) -> list[str]: + """The names of the columns in the row.""" + + @property + @abstractmethod + def number_of_columns(self) -> int: + """The number of columns in the row.""" + + @property + @abstractmethod + def schema(self) -> Schema: # TODO: rethink return type + """The schema of the row.""" + + # ------------------------------------------------------------------------------------------------------------------ + # Column operations + # ------------------------------------------------------------------------------------------------------------------ + + @abstractmethod + def get_value(self, name: str) -> ExperimentalPolarsCell: + """ + Get the value of the specified column. + + Parameters + ---------- + name: + The name of the column. + + Returns + ------- + value: + The value of the column. + """ + + @abstractmethod + def get_column_type(self, name: str) -> ColumnType: # TODO: rethink return type + """ + Get the type of the specified column. + + Parameters + ---------- + name: + The name of the column. + + Returns + ------- + type: + The type of the column. + """ + + @abstractmethod + def has_column(self, name: str) -> bool: + """ + Check if the row has a column with the specified name. + + Parameters + ---------- + name: + The name of the column. + + Returns + ------- + has_column: + Whether the row has a column with the specified name. + """ diff --git a/src/safeds/data/tabular/containers/_experimental_polars_table.py b/src/safeds/data/tabular/containers/_experimental_polars_table.py index cfff17824..d64b4a6f7 100644 --- a/src/safeds/data/tabular/containers/_experimental_polars_table.py +++ b/src/safeds/data/tabular/containers/_experimental_polars_table.py @@ -28,12 +28,13 @@ class ExperimentalPolarsTable: To create a `Table` call the constructor or use one of the following static methods: - | Method | Description | - | ------------------------------------------------------------------------------------------------------------------ | -------------------------------------- | - | [from_csv_file][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_csv_file] | Create a table from a CSV file. | - | [from_json_file][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_json_file] | Create a table from a JSON file. | - | [from_dict][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_dict] | Create a table from a dictionary. | - | [from_columns][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_columns] | Create a table from a list of columns. | + | Method | Description | + | ------------------------------------------------------------------------------------------------------------------------ | -------------------------------------- | + | [from_csv_file][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_csv_file] | Create a table from a CSV file. | + | [from_json_file][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_json_file] | Create a table from a JSON file. | + | [from_parquet_file][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_parquet_file] | Create a table from a Parquet file. | + | [from_columns][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_columns] | Create a table from a list of columns. | + | [from_dict][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_dict] | Create a table from a dictionary. | Parameters ---------- @@ -212,7 +213,7 @@ def __str__(self) -> str: @property def column_names(self) -> list[str]: """ - Names of the columns in the table. + The names of the columns in the table. Examples -------- @@ -354,6 +355,11 @@ def remove_duplicate_rows(self) -> ExperimentalPolarsTable: """ Remove duplicate rows from the table. + Returns + ------- + filtered_table: + The table without duplicate rows. + Examples -------- >>> from safeds.data.tabular.containers import ExperimentalPolarsTable diff --git a/src/safeds/data/tabular/containers/_experimental_vectorized_cell.py b/src/safeds/data/tabular/containers/_experimental_vectorized_cell.py new file mode 100644 index 000000000..d71453f37 --- /dev/null +++ b/src/safeds/data/tabular/containers/_experimental_vectorized_cell.py @@ -0,0 +1,10 @@ +from safeds.data.tabular.containers import ExperimentalPolarsCell, ExperimentalPolarsColumn + + +class _VectorizedCell(ExperimentalPolarsCell): + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + + def __init__(self, column: ExperimentalPolarsColumn): + self._column: ExperimentalPolarsColumn = column diff --git a/src/safeds/data/tabular/containers/_experimental_vectorized_row.py b/src/safeds/data/tabular/containers/_experimental_vectorized_row.py new file mode 100644 index 000000000..7473a32e6 --- /dev/null +++ b/src/safeds/data/tabular/containers/_experimental_vectorized_row.py @@ -0,0 +1,105 @@ +from __future__ import annotations + +from safeds.data.tabular.containers import ExperimentalPolarsRow, ExperimentalPolarsTable +from safeds.data.tabular.containers._vectorized_cell import _VectorizedCell +from safeds.data.tabular.typing import Schema, ColumnType + + +class _VectorizedRow(ExperimentalPolarsRow): + """ + A row is a one-dimensional collection of named, heterogeneous values. + + This implementation treats an entire table as a row, where each column is a "cell" in the row. This greatly speeds + up operations on the row. + """ + + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + + def __init__(self, table: ExperimentalPolarsTable): + self._table: ExperimentalPolarsTable = table + + def __eq__(self, other: object) -> bool: + if not isinstance(other, _VectorizedRow): + return NotImplemented + if self is other: + return True + return self._table == other._table + + def __hash__(self) -> int: + return self._table.__hash__() + + def __sizeof__(self) -> int: + return self._table.__sizeof__() + + # ------------------------------------------------------------------------------------------------------------------ + # Properties + # ------------------------------------------------------------------------------------------------------------------ + + @property + def column_names(self) -> list[str]: + """The names of the columns in the row.""" + return self._table.column_names + + @property + def number_of_columns(self) -> int: + """The number of columns in the row.""" + return self._table.number_of_columns + + @property + def schema(self) -> Schema: # TODO: rethink return type + """The schema of the row.""" + return self._table.schema + + # ------------------------------------------------------------------------------------------------------------------ + # Column operations + # ------------------------------------------------------------------------------------------------------------------ + + def get_value(self, name: str) -> _VectorizedCell: + """ + Get the value of the specified column. + + Parameters + ---------- + name: + The name of the column. + + Returns + ------- + value: + The value of the column. + """ + return _VectorizedCell(self._table.get_column(name)) + + def get_column_type(self, name: str) -> ColumnType: # TODO: rethink return type + """ + Get the type of the specified column. + + Parameters + ---------- + name: + The name of the column. + + Returns + ------- + type: + The type of the column. + """ + return self._table.get_column_type(name) + + def has_column(self, name: str) -> bool: + """ + Check if the row has a column with the specified name. + + Parameters + ---------- + name: + The name of the column. + + Returns + ------- + has_column: + Whether the row has a column with the specified name. + """ + return self._table.has_column(name) From 4d89ea397eebbe21ca3ef2ce32a5f23690bb3656 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Mon, 6 May 2024 17:19:57 +0000 Subject: [PATCH 2/4] style: apply automated linter fixes --- .../data/tabular/containers/_experimental_polars_row.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/safeds/data/tabular/containers/_experimental_polars_row.py b/src/safeds/data/tabular/containers/_experimental_polars_row.py index ed3be4ec4..de78eab70 100644 --- a/src/safeds/data/tabular/containers/_experimental_polars_row.py +++ b/src/safeds/data/tabular/containers/_experimental_polars_row.py @@ -19,15 +19,13 @@ def __contains__(self, name: Any) -> bool: return self.has_column(name) @abstractmethod - def __eq__(self, other: object) -> bool: - ... + def __eq__(self, other: object) -> bool: ... def __getitem__(self, name: str) -> ExperimentalPolarsCell: return self.get_value(name) @abstractmethod - def __hash__(self) -> int: - ... + def __hash__(self) -> int: ... def __iter__(self) -> Iterator[Any]: return iter(self.column_names) @@ -36,8 +34,7 @@ def __len__(self) -> int: return self.number_of_columns @abstractmethod - def __sizeof__(self) -> int: - ... + def __sizeof__(self) -> int: ... # ------------------------------------------------------------------------------------------------------------------ # Properties From 8f51382990eda9a4e32b0eb29a281670d6bbdd76 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Mon, 6 May 2024 19:20:53 +0200 Subject: [PATCH 3/4] fix: import --- .../data/tabular/containers/_experimental_vectorized_row.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/containers/_experimental_vectorized_row.py b/src/safeds/data/tabular/containers/_experimental_vectorized_row.py index 7473a32e6..9aae96752 100644 --- a/src/safeds/data/tabular/containers/_experimental_vectorized_row.py +++ b/src/safeds/data/tabular/containers/_experimental_vectorized_row.py @@ -1,7 +1,7 @@ from __future__ import annotations from safeds.data.tabular.containers import ExperimentalPolarsRow, ExperimentalPolarsTable -from safeds.data.tabular.containers._vectorized_cell import _VectorizedCell +from safeds.data.tabular.containers._experimental_vectorized_cell import _VectorizedCell from safeds.data.tabular.typing import Schema, ColumnType From bc3be1f60b08ef90415f6e954aee6aca1a3b08bb Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Mon, 6 May 2024 19:27:26 +0200 Subject: [PATCH 4/4] fix: use relative imports --- .../tabular/containers/_experimental_polars_row.py | 10 ++++++---- .../tabular/containers/_experimental_polars_table.py | 3 ++- .../containers/_experimental_vectorized_cell.py | 3 ++- .../containers/_experimental_vectorized_row.py | 12 +++++++++--- 4 files changed, 19 insertions(+), 9 deletions(-) diff --git a/src/safeds/data/tabular/containers/_experimental_polars_row.py b/src/safeds/data/tabular/containers/_experimental_polars_row.py index de78eab70..1edc0b961 100644 --- a/src/safeds/data/tabular/containers/_experimental_polars_row.py +++ b/src/safeds/data/tabular/containers/_experimental_polars_row.py @@ -1,11 +1,13 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import Any -from collections.abc import Mapping, Iterator +from collections.abc import Iterator, Mapping +from typing import TYPE_CHECKING, Any -from safeds.data.tabular.containers import ExperimentalPolarsCell -from safeds.data.tabular.typing import Schema, ColumnType +if TYPE_CHECKING: + from safeds.data.tabular.typing import ColumnType, Schema + + from ._experimental_polars_cell import ExperimentalPolarsCell class ExperimentalPolarsRow(ABC, Mapping[str, Any]): diff --git a/src/safeds/data/tabular/containers/_experimental_polars_table.py b/src/safeds/data/tabular/containers/_experimental_polars_table.py index d64b4a6f7..1375b3b95 100644 --- a/src/safeds/data/tabular/containers/_experimental_polars_table.py +++ b/src/safeds/data/tabular/containers/_experimental_polars_table.py @@ -3,9 +3,10 @@ from typing import TYPE_CHECKING, Any, Literal from safeds._utils import _check_and_normalize_file_path -from safeds.data.tabular.containers import Table from safeds.exceptions import ColumnLengthMismatchError +from ._table import Table + if TYPE_CHECKING: from collections.abc import Callable, Mapping, Sequence from pathlib import Path diff --git a/src/safeds/data/tabular/containers/_experimental_vectorized_cell.py b/src/safeds/data/tabular/containers/_experimental_vectorized_cell.py index d71453f37..0a9708d25 100644 --- a/src/safeds/data/tabular/containers/_experimental_vectorized_cell.py +++ b/src/safeds/data/tabular/containers/_experimental_vectorized_cell.py @@ -1,4 +1,5 @@ -from safeds.data.tabular.containers import ExperimentalPolarsCell, ExperimentalPolarsColumn +from ._experimental_polars_cell import ExperimentalPolarsCell +from ._experimental_polars_column import ExperimentalPolarsColumn class _VectorizedCell(ExperimentalPolarsCell): diff --git a/src/safeds/data/tabular/containers/_experimental_vectorized_row.py b/src/safeds/data/tabular/containers/_experimental_vectorized_row.py index 9aae96752..001f7fa71 100644 --- a/src/safeds/data/tabular/containers/_experimental_vectorized_row.py +++ b/src/safeds/data/tabular/containers/_experimental_vectorized_row.py @@ -1,8 +1,14 @@ from __future__ import annotations -from safeds.data.tabular.containers import ExperimentalPolarsRow, ExperimentalPolarsTable -from safeds.data.tabular.containers._experimental_vectorized_cell import _VectorizedCell -from safeds.data.tabular.typing import Schema, ColumnType +from typing import TYPE_CHECKING + +from ._experimental_polars_row import ExperimentalPolarsRow +from ._experimental_vectorized_cell import _VectorizedCell + +if TYPE_CHECKING: + from safeds.data.tabular.typing import ColumnType, Schema + + from ._experimental_polars_table import ExperimentalPolarsTable class _VectorizedRow(ExperimentalPolarsRow):