diff --git a/src/resources/from_json_file.json b/src/resources/from_json_file.json index 5965ef149..5cd814134 100644 --- a/src/resources/from_json_file.json +++ b/src/resources/from_json_file.json @@ -1 +1,6 @@ -{ "a": { "0": 1, "1": 2, "2": 3 }, "b": { "0": 4, "1": 5, "2": 6 } } +{ + "columns": [ + { "name": "a", "datatype": "Int64", "bit_settings": "", "values": [1, 2, 3] }, + { "name": "b", "datatype": "Int64", "bit_settings": "", "values": [4, 5, 6] } + ] +} diff --git a/src/resources/from_json_file_2.json b/src/resources/from_json_file_2.json deleted file mode 100644 index 5cd814134..000000000 --- a/src/resources/from_json_file_2.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "columns": [ - { "name": "a", "datatype": "Int64", "bit_settings": "", "values": [1, 2, 3] }, - { "name": "b", "datatype": "Int64", "bit_settings": "", "values": [4, 5, 6] } - ] -} diff --git a/src/resources/to_json_file.json b/src/resources/to_json_file.json index 5965ef149..5cd814134 100644 --- a/src/resources/to_json_file.json +++ b/src/resources/to_json_file.json @@ -1 +1,6 @@ -{ "a": { "0": 1, "1": 2, "2": 3 }, "b": { "0": 4, "1": 5, "2": 6 } } +{ + "columns": [ + { "name": "a", "datatype": "Int64", "bit_settings": "", "values": [1, 2, 3] }, + { "name": "b", "datatype": "Int64", "bit_settings": "", "values": [4, 5, 6] } + ] +} diff --git a/src/resources/to_json_file_2.json b/src/resources/to_json_file_2.json deleted file mode 100644 index 5cd814134..000000000 --- a/src/resources/to_json_file_2.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "columns": [ - { "name": "a", "datatype": "Int64", "bit_settings": "", "values": [1, 2, 3] }, - { "name": "b", "datatype": "Int64", "bit_settings": "", "values": [4, 5, 6] } - ] -} diff --git a/src/safeds/data/tabular/containers/__init__.py b/src/safeds/data/tabular/containers/__init__.py index 5512b0b1e..35815eff3 100644 --- a/src/safeds/data/tabular/containers/__init__.py +++ b/src/safeds/data/tabular/containers/__init__.py @@ -8,6 +8,7 @@ from ._cell import Cell from ._column import Column from ._row import Row + from ._string_cell import StringCell from ._table import Table apipkg.initpkg( @@ -16,6 +17,7 @@ "Cell": "._cell:Cell", "Column": "._column:Column", "Row": "._row:Row", + "StringCell": "._string_cell:StringCell", "Table": "._table:Table", }, ) @@ -24,5 +26,6 @@ "Cell", "Column", "Row", + "StringCell", "Table", ] diff --git a/src/safeds/data/tabular/containers/_cell.py b/src/safeds/data/tabular/containers/_cell.py index b0c54f580..0201736b9 100644 --- a/src/safeds/data/tabular/containers/_cell.py +++ b/src/safeds/data/tabular/containers/_cell.py @@ -6,8 +6,10 @@ if TYPE_CHECKING: import polars as pl + from ._string_cell import StringCell + T_co = TypeVar("T_co", covariant=True) -P = TypeVar("P") +P_contra = TypeVar("P_contra", contravariant=True) R_co = TypeVar("R_co", covariant=True) @@ -109,10 +111,10 @@ def __mul__(self, other: Any) -> Cell[R_co]: ... def __rmul__(self, other: Any) -> Cell[R_co]: ... @abstractmethod - def __pow__(self, other: float | Cell[P]) -> Cell[R_co]: ... + def __pow__(self, other: float | Cell[P_contra]) -> Cell[R_co]: ... @abstractmethod - def __rpow__(self, other: float | Cell[P]) -> Cell[R_co]: ... + def __rpow__(self, other: float | Cell[P_contra]) -> Cell[R_co]: ... @abstractmethod def __sub__(self, other: Any) -> Cell[R_co]: ... @@ -134,6 +136,15 @@ def __hash__(self) -> int: ... @abstractmethod def __sizeof__(self) -> int: ... + # ------------------------------------------------------------------------------------------------------------------ + # Properties + # ------------------------------------------------------------------------------------------------------------------ + + @property + @abstractmethod + def str(self) -> StringCell: + """Namespace for operations on strings.""" + # ------------------------------------------------------------------------------------------------------------------ # Boolean operations # ------------------------------------------------------------------------------------------------------------------ @@ -372,6 +383,36 @@ def add(self, other: Any) -> Cell[R_co]: """ return self.__add__(other) + def div(self, other: Any) -> Cell[R_co]: + """ + Divide by a value. This is equivalent to the `/` operator. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("example", [6, 8]) + >>> column.transform(lambda cell: cell.div(2)) + +---------+ + | example | + | --- | + | f64 | + +=========+ + | 3.00000 | + | 4.00000 | + +---------+ + + >>> column.transform(lambda cell: cell / 2) + +---------+ + | example | + | --- | + | f64 | + +=========+ + | 3.00000 | + | 4.00000 | + +---------+ + """ + return self.__truediv__(other) + def mod(self, other: Any) -> Cell[R_co]: """ Perform a modulo operation. This is equivalent to the `%` operator. @@ -432,7 +473,7 @@ def mul(self, other: Any) -> Cell[R_co]: """ return self.__mul__(other) - def pow(self, other: float | Cell[P]) -> Cell[R_co]: + def pow(self, other: float | Cell[P_contra]) -> Cell[R_co]: """ Raise to a power. This is equivalent to the `**` operator. @@ -492,36 +533,6 @@ def sub(self, other: Any) -> Cell[R_co]: """ return self.__sub__(other) - def div(self, other: Any) -> Cell[R_co]: - """ - Divide by a value. This is equivalent to the `/` operator. - - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", [6, 8]) - >>> column.transform(lambda cell: cell.div(2)) - +---------+ - | example | - | --- | - | f64 | - +=========+ - | 3.00000 | - | 4.00000 | - +---------+ - - >>> column.transform(lambda cell: cell / 2) - +---------+ - | example | - | --- | - | f64 | - +=========+ - | 3.00000 | - | 4.00000 | - +---------+ - """ - return self.__truediv__(other) - # ------------------------------------------------------------------------------------------------------------------ # Comparison operations # ------------------------------------------------------------------------------------------------------------------ diff --git a/src/safeds/data/tabular/containers/_column.py b/src/safeds/data/tabular/containers/_column.py index 9a013cbf7..f047a8741 100644 --- a/src/safeds/data/tabular/containers/_column.py +++ b/src/safeds/data/tabular/containers/_column.py @@ -756,8 +756,8 @@ def correlation_with(self, other: Column) -> float: >>> column1.correlation_with(column2) 1.0 - >>> column4 = Column("test", [3, 2, 1]) - >>> column1.correlation_with(column4) + >>> column3 = Column("test", [3, 2, 1]) + >>> column1.correlation_with(column3) -1.0 """ import polars as pl diff --git a/src/safeds/data/tabular/containers/_lazy_cell.py b/src/safeds/data/tabular/containers/_lazy_cell.py index 4e66678e5..0eaf9ab11 100644 --- a/src/safeds/data/tabular/containers/_lazy_cell.py +++ b/src/safeds/data/tabular/containers/_lazy_cell.py @@ -9,6 +9,8 @@ if TYPE_CHECKING: import polars as pl + from ._string_cell import StringCell + T = TypeVar("T") P = TypeVar("P") R = TypeVar("R") @@ -31,7 +33,9 @@ def __init__(self, expression: pl.Expr) -> None: # "Boolean" operators (actually bitwise) ----------------------------------- def __invert__(self) -> Cell[bool]: - return _wrap(self._expression.__invert__()) + import polars as pl + + return _wrap(self._expression.cast(pl.Boolean).__invert__()) def __and__(self, other: bool | Cell[bool]) -> Cell[bool]: return _wrap(self._expression.__and__(other)) @@ -83,10 +87,16 @@ def __abs__(self) -> Cell[R]: return _wrap(self._expression.__abs__()) def __ceil__(self) -> Cell[R]: - return _wrap(self._expression.ceil()) + import polars as pl + + # polars does not yet implement floor for integers + return _wrap(self._expression.cast(pl.Float64).ceil()) def __floor__(self) -> Cell[R]: - return _wrap(self._expression.floor()) + import polars as pl + + # polars does not yet implement floor for integers + return _wrap(self._expression.cast(pl.Float64).floor()) def __neg__(self) -> Cell[R]: return _wrap(self._expression.__neg__()) @@ -166,6 +176,16 @@ def __hash__(self) -> int: def __sizeof__(self) -> int: return self._expression.__sizeof__() + # ------------------------------------------------------------------------------------------------------------------ + # Properties + # ------------------------------------------------------------------------------------------------------------------ + + @property + def str(self) -> StringCell: + from ._lazy_string_cell import _LazyStringCell # circular import + + return _LazyStringCell(self._expression) + # ------------------------------------------------------------------------------------------------------------------ # Internal # ------------------------------------------------------------------------------------------------------------------ diff --git a/src/safeds/data/tabular/containers/_lazy_string_cell.py b/src/safeds/data/tabular/containers/_lazy_string_cell.py new file mode 100644 index 000000000..20f80e0fc --- /dev/null +++ b/src/safeds/data/tabular/containers/_lazy_string_cell.py @@ -0,0 +1,101 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from safeds._utils import _structural_hash +from safeds._validation import _check_bounds, _ClosedBound + +from ._lazy_cell import _LazyCell +from ._string_cell import StringCell + +if TYPE_CHECKING: + import datetime + + import polars as pl + + from ._cell import Cell + + +class _LazyStringCell(StringCell): + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + + def __init__(self, expression: pl.Expr) -> None: + self._expression: pl.Expr = expression + + def __hash__(self) -> int: + return _structural_hash(self._expression.meta.serialize()) + + def __sizeof__(self) -> int: + return self._expression.__sizeof__() + + # ------------------------------------------------------------------------------------------------------------------ + # String operations + # ------------------------------------------------------------------------------------------------------------------ + + def contains(self, substring: str) -> Cell[bool]: + return _LazyCell(self._expression.str.contains(substring, literal=True)) + + def length(self, optimize_for_ascii: bool = False) -> Cell[int]: + if optimize_for_ascii: + return _LazyCell(self._expression.str.len_bytes()) + else: + return _LazyCell(self._expression.str.len_chars()) + + def ends_with(self, suffix: str) -> Cell[bool]: + return _LazyCell(self._expression.str.ends_with(suffix)) + + def index_of(self, substring: str) -> Cell[int | None]: + return _LazyCell(self._expression.str.find(substring, literal=True)) + + def replace(self, old: str, new: str) -> Cell[str]: + return _LazyCell(self._expression.str.replace_all(old, new, literal=True)) + + def starts_with(self, prefix: str) -> Cell[bool]: + return _LazyCell(self._expression.str.starts_with(prefix)) + + def substring(self, start: int = 0, length: int | None = None) -> Cell[str]: + _check_bounds("length", length, lower_bound=_ClosedBound(0)) + + return _LazyCell(self._expression.str.slice(start, length)) + + def to_date(self) -> Cell[datetime.date | None]: + return _LazyCell(self._expression.str.to_date(format="%F", strict=False)) + + def to_datetime(self) -> Cell[datetime.datetime | None]: + return _LazyCell(self._expression.str.to_datetime(format="%+", strict=False)) + + def to_int(self, *, base: int = 10) -> Cell[int | None]: + return _LazyCell(self._expression.str.to_integer(base=base, strict=False)) + + def to_float(self) -> Cell[float | None]: + import polars as pl + + return _LazyCell(self._expression.cast(pl.Float64, strict=False)) + + def to_lowercase(self) -> Cell[str]: + return _LazyCell(self._expression.str.to_lowercase()) + + def to_uppercase(self) -> Cell[str]: + return _LazyCell(self._expression.str.to_uppercase()) + + def trim(self) -> Cell[str]: + return _LazyCell(self._expression.str.strip_chars()) + + def trim_end(self) -> Cell[str]: + return _LazyCell(self._expression.str.strip_chars_end()) + + def trim_start(self) -> Cell[str]: + return _LazyCell(self._expression.str.strip_chars_start()) + + # ------------------------------------------------------------------------------------------------------------------ + # Internal + # ------------------------------------------------------------------------------------------------------------------ + + def _equals(self, other: object) -> bool: + if not isinstance(other, _LazyStringCell): + return NotImplemented + if self is other: + return True + return self._expression.meta.eq(other._expression.meta) diff --git a/src/safeds/data/tabular/containers/_string_cell.py b/src/safeds/data/tabular/containers/_string_cell.py new file mode 100644 index 000000000..e47a41444 --- /dev/null +++ b/src/safeds/data/tabular/containers/_string_cell.py @@ -0,0 +1,505 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + import datetime + + from safeds.data.tabular.containers import Cell + + +class StringCell(ABC): + """ + Namespace for operations on strings. + + This class cannot be instantiated directly. It can only be accessed using the `str` attribute of a cell. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("example", ["ab", "bc", "cd"]) + >>> column.transform(lambda cell: cell.str.to_uppercase()) + +---------+ + | example | + | --- | + | str | + +=========+ + | AB | + | BC | + | CD | + +---------+ + """ + + @abstractmethod + def contains(self, substring: str) -> Cell[bool]: + """ + Check if the string value in the cell contains the substring. + + Parameters + ---------- + substring: + The substring to search for. + + Returns + ------- + contains: + Whether the string value contains the substring. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("example", ["ab", "bc", "cd"]) + >>> column.count_if(lambda cell: cell.str.contains("b")) + 2 + """ + + @abstractmethod + def ends_with(self, suffix: str) -> Cell[bool]: + """ + Check if the string value in the cell ends with the suffix. + + Parameters + ---------- + suffix: + The suffix to search for. + + Returns + ------- + ends_with: + Whether the string value ends with the suffix. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("example", ["ab", "bc", "cd"]) + >>> column.count_if(lambda cell: cell.str.ends_with("c")) + 1 + """ + + @abstractmethod + def index_of(self, substring: str) -> Cell[int | None]: + """ + Get the index of the first occurrence of the substring in the string value in the cell. + + Parameters + ---------- + substring: + The substring to search for. + + Returns + ------- + index_of: + The index of the first occurrence of the substring. If the substring is not found, None is returned. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("example", ["ab", "bc", "cd"]) + >>> column.transform(lambda cell: cell.str.index_of("b")) + +---------+ + | example | + | --- | + | u32 | + +=========+ + | 1 | + | 0 | + | null | + +---------+ + """ + + @abstractmethod + def length(self, *, optimize_for_ascii: bool = False) -> Cell[int]: + """ + Get the number of characters of the string value in the cell. + + Parameters + ---------- + optimize_for_ascii: + Greatly speed up this operation if the string is ASCII-only. If the string contains non-ASCII characters, + this option will return incorrect results, though. + + Returns + ------- + length: + The length of the string value. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("example", ["", "a", "abc"]) + >>> column.transform(lambda cell: cell.str.length()) + +---------+ + | example | + | --- | + | u32 | + +=========+ + | 0 | + | 1 | + | 3 | + +---------+ + """ + + @abstractmethod + def replace(self, old: str, new: str) -> Cell[str]: + """ + Replace occurrences of the old substring with the new substring in the string value in the cell. + + Parameters + ---------- + old: + The substring to replace. + new: + The substring to replace with. + + Returns + ------- + replaced_string: + The string value with the occurrences replaced. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("example", ["ab", "bc", "cd"]) + >>> column.transform(lambda cell: cell.str.replace("b", "z")) + +---------+ + | example | + | --- | + | str | + +=========+ + | az | + | zc | + | cd | + +---------+ + """ + + @abstractmethod + def starts_with(self, prefix: str) -> Cell[bool]: + """ + Check if the string value in the cell starts with the prefix. + + Parameters + ---------- + prefix: + The prefix to search for. + + Returns + ------- + starts_with: + Whether the string value starts with the prefix. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("example", ["ab", "bc", "cd"]) + >>> column.count_if(lambda cell: cell.str.starts_with("a")) + 1 + """ + + @abstractmethod + def substring(self, start: int = 0, length: int | None = None) -> Cell[str]: + """ + Get a substring of the string value in the cell. + + Parameters + ---------- + start: + The start index of the substring. + length: + The length of the substring. If None, the slice contains all rows starting from `start`. Must greater than + or equal to 0. + + Returns + ------- + substring: + The substring of the string value. + + Raises + ------ + OutOfBoundsError + If length is less than 0. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("example", ["abc", "def", "ghi"]) + >>> column.transform(lambda cell: cell.str.substring(1, 2)) + +---------+ + | example | + | --- | + | str | + +=========+ + | bc | + | ef | + | hi | + +---------+ + """ + + @abstractmethod + def to_date(self) -> Cell[datetime.date | None]: + """ + Convert the string value in the cell to a date. Requires the string to be in the ISO 8601 format. + + Returns + ------- + date: + The date value. If the string cannot be converted to a date, None is returned. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("example", ["2021-01-01", "2021-02-01", "abc"]) + >>> column.transform(lambda cell: cell.str.to_date()) + +------------+ + | example | + | --- | + | date | + +============+ + | 2021-01-01 | + | 2021-02-01 | + | null | + +------------+ + """ + + @abstractmethod + def to_datetime(self) -> Cell[datetime.datetime | None]: + """ + Convert the string value in the cell to a datetime. Requires the string to be in the ISO 8601 format. + + Returns + ------- + datetime: + The datetime value. If the string cannot be converted to a datetime, None is returned. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("example", ["2021-01-01T00:00:00z", "2021-02-01T00:00:00z", "abc"]) + >>> column.transform(lambda cell: cell.str.to_datetime()) + +-------------------------+ + | example | + | --- | + | datetime[μs, UTC] | + +=========================+ + | 2021-01-01 00:00:00 UTC | + | 2021-02-01 00:00:00 UTC | + | null | + +-------------------------+ + """ + + @abstractmethod + def to_float(self) -> Cell[float | None]: + """ + Convert the string value in the cell to a float. + + Returns + ------- + float: + The float value. If the string cannot be converted to a float, None is returned. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("example", ["1", "3.4", "5.6", "abc"]) + >>> column.transform(lambda cell: cell.str.to_float()) + +---------+ + | example | + | --- | + | f64 | + +=========+ + | 1.00000 | + | 3.40000 | + | 5.60000 | + | null | + +---------+ + """ + + @abstractmethod + def to_int(self, *, base: int = 10) -> Cell[int | None]: + """ + Convert the string value in the cell to an integer. + + Parameters + ---------- + base: + The base of the integer. + + Returns + ------- + int: + The integer value. If the string cannot be converted to an integer, None is returned. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column1 = Column("example", ["1", "2", "3", "abc"]) + >>> column1.transform(lambda cell: cell.str.to_int()) + +---------+ + | example | + | --- | + | i64 | + +=========+ + | 1 | + | 2 | + | 3 | + | null | + +---------+ + + >>> column2 = Column("example", ["1", "10", "11", "abc"]) + >>> column2.transform(lambda cell: cell.str.to_int(base=2)) + +---------+ + | example | + | --- | + | i64 | + +=========+ + | 1 | + | 2 | + | 3 | + | null | + +---------+ + """ + + @abstractmethod + def to_lowercase(self) -> Cell[str]: + """ + Convert the string value in the cell to lowercase. + + Returns + ------- + lowercase: + The string value in lowercase. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("example", ["AB", "BC", "CD"]) + >>> column.transform(lambda cell: cell.str.to_lowercase()) + +---------+ + | example | + | --- | + | str | + +=========+ + | ab | + | bc | + | cd | + +---------+ + """ + + @abstractmethod + def to_uppercase(self) -> Cell[str]: + """ + Convert the string value in the cell to uppercase. + + Returns + ------- + uppercase: + The string value in uppercase. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("example", ["ab", "bc", "cd"]) + >>> column.transform(lambda cell: cell.str.to_uppercase()) + +---------+ + | example | + | --- | + | str | + +=========+ + | AB | + | BC | + | CD | + +---------+ + """ + + @abstractmethod + def trim(self) -> Cell[str]: + """ + Remove whitespace from the start and end of the string value in the cell. + + Returns + ------- + trimmed: + The string value without whitespace at the start and end. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("example", ["", " abc", "abc ", " abc "]) + >>> column.transform(lambda cell: cell.str.trim()) + +---------+ + | example | + | --- | + | str | + +=========+ + | | + | abc | + | abc | + | abc | + +---------+ + """ + + @abstractmethod + def trim_end(self) -> Cell[str]: + """ + Remove whitespace from the end of the string value in the cell. + + Returns + ------- + trimmed: + The string value without whitespace at the end. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("example", ["", " abc", "abc ", " abc "]) + >>> column.transform(lambda cell: cell.str.trim_end()) + +---------+ + | example | + | --- | + | str | + +=========+ + | | + | abc | + | abc | + | abc | + +---------+ + """ + + @abstractmethod + def trim_start(self) -> Cell[str]: + """ + Remove whitespace from the start of the string value in the cell. + + Returns + ------- + trimmed: + The string value without whitespace at the start. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("example", ["", " abc", "abc ", " abc "]) + >>> column.transform(lambda cell: cell.str.trim_start()) + +---------+ + | example | + | --- | + | str | + +=========+ + | | + | abc | + | abc | + | abc | + +---------+ + """ + + # ------------------------------------------------------------------------------------------------------------------ + # Internal + # ------------------------------------------------------------------------------------------------------------------ + + @abstractmethod + def _equals(self, other: object) -> bool: + """ + Check if this cell is equal to another object. + + This method is needed because the `__eq__` method is used for element-wise comparisons. + """ diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 476791915..80f7971de 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -225,7 +225,7 @@ def from_json_file(path: str | Path) -> Table: Examples -------- >>> from safeds.data.tabular.containers import Table - >>> Table.from_json_file("./src/resources/from_json_file_2.json") + >>> Table.from_json_file("./src/resources/from_json_file.json") +-----+-----+ | a | b | | --- | --- | @@ -1883,7 +1883,7 @@ def to_json_file( -------- >>> from safeds.data.tabular.containers import Table >>> table = Table({"a": [1, 2, 3], "b": [4, 5, 6]}) - >>> table.to_json_file("./src/resources/to_json_file_2.json") + >>> table.to_json_file("./src/resources/to_json_file.json") """ path = _normalize_and_check_file_path(path, ".json", [".json"]) path.parent.mkdir(parents=True, exist_ok=True) diff --git a/src/safeds/data/tabular/plotting/_column_plotter.py b/src/safeds/data/tabular/plotting/_column_plotter.py index e9692c7da..f47dc8003 100644 --- a/src/safeds/data/tabular/plotting/_column_plotter.py +++ b/src/safeds/data/tabular/plotting/_column_plotter.py @@ -35,7 +35,7 @@ def box_plot(self) -> Image: Returns ------- - box_plot: + plot: The box plot as an image. Raises @@ -78,7 +78,7 @@ def histogram(self, *, max_bin_count: int = 10) -> Image: Returns ------- - histogram: + plot: The plot as an image. Examples @@ -100,7 +100,7 @@ def lag_plot(self, lag: int) -> Image: Returns ------- - lag_plot: + plot: The plot as an image. Raises diff --git a/src/safeds/data/tabular/plotting/_table_plotter.py b/src/safeds/data/tabular/plotting/_table_plotter.py index 81bd3f83e..5526dda48 100644 --- a/src/safeds/data/tabular/plotting/_table_plotter.py +++ b/src/safeds/data/tabular/plotting/_table_plotter.py @@ -210,7 +210,7 @@ def line_plot(self, x_name: str, y_name: str) -> Image: Returns ------- - line_plot: + plot: The plot as an image. Raises @@ -296,7 +296,7 @@ def scatter_plot(self, x_name: str, y_name: str) -> Image: Returns ------- - scatter_plot: + plot: The plot as an image. Raises diff --git a/src/safeds/data/tabular/transformation/_label_encoder.py b/src/safeds/data/tabular/transformation/_label_encoder.py index fa7f2e09b..c95d17b02 100644 --- a/src/safeds/data/tabular/transformation/_label_encoder.py +++ b/src/safeds/data/tabular/transformation/_label_encoder.py @@ -20,7 +20,7 @@ class LabelEncoder(InvertibleTableTransformer): ---------- partial_order: The partial order of the labels. The labels are encoded in the order of the given list. Additional values are - encoded as the next integer after the last value in the list in the order they appear in the data. + assigned labels in the order they are encountered during fitting. """ # ------------------------------------------------------------------------------------------------------------------ @@ -47,6 +47,15 @@ def __hash__(self) -> int: # Leave out the internal state for faster hashing ) + # ------------------------------------------------------------------------------------------------------------------ + # Properties + # ------------------------------------------------------------------------------------------------------------------ + + @property + def partial_order(self) -> list[Any]: + """The partial order of the labels.""" + return list(self._partial_order) # defensive copy + # ------------------------------------------------------------------------------------------------------------------ # Learning and transformation # ------------------------------------------------------------------------------------------------------------------ diff --git a/src/safeds/data/tabular/transformation/_one_hot_encoder.py b/src/safeds/data/tabular/transformation/_one_hot_encoder.py index 25d6a82d9..ca488d8f8 100644 --- a/src/safeds/data/tabular/transformation/_one_hot_encoder.py +++ b/src/safeds/data/tabular/transformation/_one_hot_encoder.py @@ -99,6 +99,15 @@ def __hash__(self) -> int: self._mapping, ) + # ------------------------------------------------------------------------------------------------------------------ + # Properties + # ------------------------------------------------------------------------------------------------------------------ + + @property + def separator(self) -> str: + """The separator used to separate the original column name from the value in the new column names.""" + return self._separator + # ------------------------------------------------------------------------------------------------------------------ # Learning and transformation # ------------------------------------------------------------------------------------------------------------------ diff --git a/src/safeds/ml/classical/regression/_regressor.py b/src/safeds/ml/classical/regression/_regressor.py index 4efd9c894..ce08bd506 100644 --- a/src/safeds/ml/classical/regression/_regressor.py +++ b/src/safeds/ml/classical/regression/_regressor.py @@ -23,6 +23,8 @@ def summarize_metrics(self, validation_or_test_set: Table | TabularDataset) -> T """ Summarize the regressor's metrics on the given data. + **Note:** The model must be fitted. + Parameters ---------- validation_or_test_set: @@ -65,7 +67,10 @@ def coefficient_of_determination(self, validation_or_test_set: Table | TabularDa | 0.0 | The model is as good as predicting the mean of the target values. Try something else. | | (-∞, 0.0) | The model is worse than predicting the mean of the target values. Something is very wrong. | - **Note:** Some other libraries call this metric `r2_score`. + **Notes:** + + - The model must be fitted. + - Some other libraries call this metric `r2_score`. Parameters ---------- @@ -100,6 +105,8 @@ def mean_absolute_error(self, validation_or_test_set: Table | TabularDataset) -> values. The **lower** the mean absolute error, the better the regressor. Results range from 0.0 to positive infinity. + **Note:** The model must be fitted. + Parameters ---------- validation_or_test_set: @@ -137,6 +144,8 @@ def mean_directional_accuracy(self, validation_or_test_set: Table | TabularDatas This metric is useful for time series data, where the order of the target values has a meaning. It is not useful for other types of data. Because of this, it is not included in the `summarize_metrics` method. + **Note:** The model must be fitted. + Parameters ---------- validation_or_test_set: @@ -170,7 +179,10 @@ def mean_squared_error(self, validation_or_test_set: Table | TabularDataset) -> values. The **lower** the mean squared error, the better the regressor. Results range from 0.0 to positive infinity. - **Note:** To get the root mean squared error (RMSE), take the square root of the result. + **NoteS:** + + - The model must be fitted. + - To get the root mean squared error (RMSE), take the square root of the result. Parameters ---------- @@ -205,6 +217,8 @@ def median_absolute_deviation(self, validation_or_test_set: Table | TabularDatas target values. The **lower** the median absolute deviation, the better the regressor. Results range from 0.0 to positive infinity. + **Note:** The model must be fitted. + Parameters ---------- validation_or_test_set: diff --git a/src/safeds/ml/metrics/_classification_metrics.py b/src/safeds/ml/metrics/_classification_metrics.py index 94b1efe72..97ffbf7a5 100644 --- a/src/safeds/ml/metrics/_classification_metrics.py +++ b/src/safeds/ml/metrics/_classification_metrics.py @@ -1,5 +1,6 @@ from __future__ import annotations +from abc import ABC, abstractmethod from typing import TYPE_CHECKING, Any from safeds.data.labeled.containers import TabularDataset @@ -10,9 +11,12 @@ from safeds.data.tabular.containers import Column -class ClassificationMetrics: +class ClassificationMetrics(ABC): """A collection of classification metrics.""" + @abstractmethod + def __init__(self) -> None: ... + @staticmethod def summarize(predicted: Column | TabularDataset, expected: Column | TabularDataset, positive_class: Any) -> Table: """ diff --git a/src/safeds/ml/metrics/_regression_metrics.py b/src/safeds/ml/metrics/_regression_metrics.py index df75940bc..79cb2a872 100644 --- a/src/safeds/ml/metrics/_regression_metrics.py +++ b/src/safeds/ml/metrics/_regression_metrics.py @@ -1,13 +1,18 @@ from __future__ import annotations +from abc import ABC, abstractmethod + from safeds.data.labeled.containers import TabularDataset from safeds.data.tabular.containers import Column, Table from safeds.exceptions import ColumnLengthMismatchError -class RegressionMetrics: +class RegressionMetrics(ABC): """A collection of regression metrics.""" + @abstractmethod + def __init__(self) -> None: ... + @staticmethod def summarize(predicted: Column | TabularDataset, expected: Column | TabularDataset) -> Table: """ diff --git a/src/safeds/ml/nn/layers/_convolutional2d_layer.py b/src/safeds/ml/nn/layers/_convolutional2d_layer.py index 70b717487..8c507c933 100644 --- a/src/safeds/ml/nn/layers/_convolutional2d_layer.py +++ b/src/safeds/ml/nn/layers/_convolutional2d_layer.py @@ -82,21 +82,22 @@ def forward(self, x: Tensor) -> Tensor: class Convolutional2DLayer(Layer): - def __init__(self, output_channel: int, kernel_size: int, *, stride: int = 1, padding: int = 0): - """ - Create a Convolutional 2D Layer. + """ + A convolutional 2D Layer. + + Parameters + ---------- + output_channel: + the amount of output channels + kernel_size: + the size of the kernel + stride: + the stride of the convolution + padding: + the padding of the convolution + """ - Parameters - ---------- - output_channel: - the amount of output channels - kernel_size: - the size of the kernel - stride: - the stride of the convolution - padding: - the padding of the convolution - """ + def __init__(self, output_channel: int, kernel_size: int, *, stride: int = 1, padding: int = 0): self._output_channel = output_channel self._kernel_size = kernel_size self._stride = stride @@ -246,6 +247,23 @@ def __sizeof__(self) -> int: class ConvolutionalTranspose2DLayer(Convolutional2DLayer): + """ + A convolutional transpose 2D Layer. + + Parameters + ---------- + output_channel: + the amount of output channels + kernel_size: + the size of the kernel + stride: + the stride of the transposed convolution + padding: + the padding of the transposed convolution + output_padding: + the output padding of the transposed convolution + """ + def __init__( self, output_channel: int, @@ -255,22 +273,6 @@ def __init__( padding: int = 0, output_padding: int = 0, ): - """ - Create a Convolutional Transpose 2D Layer. - - Parameters - ---------- - output_channel: - the amount of output channels - kernel_size: - the size of the kernel - stride: - the stride of the transposed convolution - padding: - the padding of the transposed convolution - output_padding: - the output padding of the transposed convolution - """ super().__init__(output_channel, kernel_size, stride=stride, padding=padding) self._output_padding = output_padding diff --git a/src/safeds/ml/nn/layers/_flatten_layer.py b/src/safeds/ml/nn/layers/_flatten_layer.py index 5ac58e318..17f72388f 100644 --- a/src/safeds/ml/nn/layers/_flatten_layer.py +++ b/src/safeds/ml/nn/layers/_flatten_layer.py @@ -31,8 +31,9 @@ def forward(self, x: Tensor) -> Tensor: class FlattenLayer(Layer): + """A flatten layer.""" + def __init__(self) -> None: - """Create a Flatten Layer.""" self._input_size: ImageSize | None = None self._output_size: int | None = None diff --git a/src/safeds/ml/nn/layers/_forward_layer.py b/src/safeds/ml/nn/layers/_forward_layer.py index 745178cfa..741668b69 100644 --- a/src/safeds/ml/nn/layers/_forward_layer.py +++ b/src/safeds/ml/nn/layers/_forward_layer.py @@ -13,52 +13,25 @@ from torch import Tensor, nn -def _create_internal_model(input_size: int, output_size: int, activation_function: str) -> nn.Module: - from torch import nn - - _init_default_device() - - class _InternalLayer(nn.Module): - def __init__(self, input_size: int, output_size: int, activation_function: str): - super().__init__() - self._layer = nn.Linear(input_size, output_size) - match activation_function: - case "sigmoid": - self._fn = nn.Sigmoid() - case "relu": - self._fn = nn.ReLU() - case "softmax": - self._fn = nn.Softmax() - case "none": - self._fn = None - case _: - raise ValueError("Unknown Activation Function: " + activation_function) - - def forward(self, x: Tensor) -> Tensor: - return self._fn(self._layer(x)) if self._fn is not None else self._layer(x) - - return _InternalLayer(input_size, output_size, activation_function) - - class ForwardLayer(Layer): - def __init__(self, output_size: int, input_size: int | None = None): - """ - Create a Feed Forward Layer. - - Parameters - ---------- - input_size: - The number of neurons in the previous layer - output_size: - The number of neurons in this layer + """ + Create a forward Layer. + + Parameters + ---------- + output_size: + The number of neurons in this layer + input_size: + The number of neurons in the previous layer + + Raises + ------ + OutOfBoundsError + If input_size < 1 + If output_size < 1 + """ - Raises - ------ - OutOfBoundsError - If input_size < 1 - If output_size < 1 - - """ + def __init__(self, output_size: int, input_size: int | None = None): if input_size is not None: self._set_input_size(input_size=input_size) @@ -145,3 +118,30 @@ def __sizeof__(self) -> int: import sys return sys.getsizeof(self._input_size) + sys.getsizeof(self._output_size) + + +def _create_internal_model(input_size: int, output_size: int, activation_function: str) -> nn.Module: + from torch import nn + + _init_default_device() + + class _InternalLayer(nn.Module): + def __init__(self, input_size: int, output_size: int, activation_function: str): + super().__init__() + self._layer = nn.Linear(input_size, output_size) + match activation_function: + case "sigmoid": + self._fn = nn.Sigmoid() + case "relu": + self._fn = nn.ReLU() + case "softmax": + self._fn = nn.Softmax() + case "none": + self._fn = None + case _: + raise ValueError("Unknown Activation Function: " + activation_function) + + def forward(self, x: Tensor) -> Tensor: + return self._fn(self._layer(x)) if self._fn is not None else self._layer(x) + + return _InternalLayer(input_size, output_size, activation_function) diff --git a/src/safeds/ml/nn/layers/_lstm_layer.py b/src/safeds/ml/nn/layers/_lstm_layer.py index db74f36a9..0e15149cb 100644 --- a/src/safeds/ml/nn/layers/_lstm_layer.py +++ b/src/safeds/ml/nn/layers/_lstm_layer.py @@ -14,51 +14,25 @@ from torch import Tensor, nn -def _create_internal_model(input_size: int, output_size: int, activation_function: str) -> nn.Module: - from torch import nn - - _init_default_device() - - class _InternalLayer(nn.Module): - def __init__(self, input_size: int, output_size: int, activation_function: str): - super().__init__() - self._layer = nn.LSTM(input_size, output_size) - match activation_function: - case "sigmoid": - self._fn = nn.Sigmoid() - case "relu": - self._fn = nn.ReLU() - case "softmax": - self._fn = nn.Softmax() - case "none": - self._fn = None - case _: - raise ValueError("Unknown Activation Function: " + activation_function) - - def forward(self, x: Tensor) -> Tensor: - return self._fn(self._layer(x)[0]) if self._fn is not None else self._layer(x)[0] - - return _InternalLayer(input_size, output_size, activation_function) - - class LSTMLayer(Layer): - def __init__(self, output_size: int, input_size: int | None = None): - """ - Create a LSTM Layer. + """ + A long short-term memory (LSTM) layer. + + Parameters + ---------- + output_size: + The number of neurons in this layer + input_size: + The number of neurons in the previous layer + + Raises + ------ + OutOfBoundsError + If input_size < 1 + If output_size < 1 + """ - Parameters - ---------- - input_size: - The number of neurons in the previous layer - output_size: - The number of neurons in this layer - - Raises - ------ - OutOfBoundsError - If input_size < 1 - If output_size < 1 - """ + def __init__(self, output_size: int, input_size: int | None = None): if input_size is not None: self._set_input_size(input_size=input_size) @@ -149,3 +123,30 @@ def __sizeof__(self) -> int: Size of this object in bytes. """ return sys.getsizeof(self._input_size) + sys.getsizeof(self._output_size) + + +def _create_internal_model(input_size: int, output_size: int, activation_function: str) -> nn.Module: + from torch import nn + + _init_default_device() + + class _InternalLayer(nn.Module): + def __init__(self, input_size: int, output_size: int, activation_function: str): + super().__init__() + self._layer = nn.LSTM(input_size, output_size) + match activation_function: + case "sigmoid": + self._fn = nn.Sigmoid() + case "relu": + self._fn = nn.ReLU() + case "softmax": + self._fn = nn.Softmax() + case "none": + self._fn = None + case _: + raise ValueError("Unknown Activation Function: " + activation_function) + + def forward(self, x: Tensor) -> Tensor: + return self._fn(self._layer(x)[0]) if self._fn is not None else self._layer(x)[0] + + return _InternalLayer(input_size, output_size, activation_function) diff --git a/src/safeds/ml/nn/layers/_pooling2d_layer.py b/src/safeds/ml/nn/layers/_pooling2d_layer.py index ffd6c2f9d..1e2170b14 100644 --- a/src/safeds/ml/nn/layers/_pooling2d_layer.py +++ b/src/safeds/ml/nn/layers/_pooling2d_layer.py @@ -14,42 +14,23 @@ from torch import Tensor, nn -def _create_internal_model(strategy: Literal["max", "avg"], kernel_size: int, padding: int, stride: int) -> nn.Module: - from torch import nn - - _init_default_device() - - class _InternalLayer(nn.Module): - def __init__(self, strategy: Literal["max", "avg"], kernel_size: int, padding: int, stride: int): - super().__init__() - match strategy: - case "max": - self._layer = nn.MaxPool2d(kernel_size=kernel_size, padding=padding, stride=stride) - case "avg": - self._layer = nn.AvgPool2d(kernel_size=kernel_size, padding=padding, stride=stride) - - def forward(self, x: Tensor) -> Tensor: - return self._layer(x) - - return _InternalLayer(strategy, kernel_size, padding, stride) - - class _Pooling2DLayer(Layer): - def __init__(self, strategy: Literal["max", "avg"], kernel_size: int, *, stride: int = -1, padding: int = 0): - """ - Create a Pooling 2D Layer. + """ + A pooling 2D Layer. + + Parameters + ---------- + strategy: + the strategy of the pooling + kernel_size: + the size of the kernel + stride: + the stride of the pooling + padding: + the padding of the pooling + """ - Parameters - ---------- - strategy: - the strategy of the pooling - kernel_size: - the size of the kernel - stride: - the stride of the pooling - padding: - the padding of the pooling - """ + def __init__(self, strategy: Literal["max", "avg"], kernel_size: int, *, stride: int = -1, padding: int = 0): self._strategy = strategy self._kernel_size = kernel_size self._stride = stride if stride != -1 else kernel_size @@ -177,34 +158,56 @@ def __sizeof__(self) -> int: class MaxPooling2DLayer(_Pooling2DLayer): - def __init__(self, kernel_size: int, *, stride: int = -1, padding: int = 0) -> None: - """ - Create a maximum Pooling 2D Layer. + """ + A maximum Pooling 2D Layer. + + Parameters + ---------- + kernel_size: + the size of the kernel + stride: + the stride of the pooling + padding: + the padding of the pooling + """ - Parameters - ---------- - kernel_size: - the size of the kernel - stride: - the stride of the pooling - padding: - the padding of the pooling - """ + def __init__(self, kernel_size: int, *, stride: int = -1, padding: int = 0) -> None: super().__init__("max", kernel_size, stride=stride, padding=padding) class AveragePooling2DLayer(_Pooling2DLayer): - def __init__(self, kernel_size: int, *, stride: int = -1, padding: int = 0) -> None: - """ - Create a average Pooling 2D Layer. + """ + An average pooling 2D Layer. + + Parameters + ---------- + kernel_size: + the size of the kernel + stride: + the stride of the pooling + padding: + the padding of the pooling + """ - Parameters - ---------- - kernel_size: - the size of the kernel - stride: - the stride of the pooling - padding: - the padding of the pooling - """ + def __init__(self, kernel_size: int, *, stride: int = -1, padding: int = 0) -> None: super().__init__("avg", kernel_size, stride=stride, padding=padding) + + +def _create_internal_model(strategy: Literal["max", "avg"], kernel_size: int, padding: int, stride: int) -> nn.Module: + from torch import nn + + _init_default_device() + + class _InternalLayer(nn.Module): + def __init__(self, strategy: Literal["max", "avg"], kernel_size: int, padding: int, stride: int): + super().__init__() + match strategy: + case "max": + self._layer = nn.MaxPool2d(kernel_size=kernel_size, padding=padding, stride=stride) + case "avg": + self._layer = nn.AvgPool2d(kernel_size=kernel_size, padding=padding, stride=stride) + + def forward(self, x: Tensor) -> Tensor: + return self._layer(x) + + return _InternalLayer(strategy, kernel_size, padding, stride) diff --git a/tests/helpers/__init__.py b/tests/helpers/__init__.py index 20a9ac339..1215fa494 100644 --- a/tests/helpers/__init__.py +++ b/tests/helpers/__init__.py @@ -1,4 +1,5 @@ from ._assertions import ( + assert_cell_operation_works, assert_tables_equal, assert_that_tabular_datasets_are_equal, ) @@ -36,6 +37,7 @@ from ._resources import resolve_resource_path __all__ = [ + "assert_cell_operation_works", "assert_tables_equal", "assert_that_tabular_datasets_are_equal", "configure_test_with_device", diff --git a/tests/helpers/_assertions.py b/tests/helpers/_assertions.py index 8976fc063..4c4847c6e 100644 --- a/tests/helpers/_assertions.py +++ b/tests/helpers/_assertions.py @@ -1,6 +1,9 @@ +from collections.abc import Callable +from typing import Any + from polars.testing import assert_frame_equal from safeds.data.labeled.containers import TabularDataset -from safeds.data.tabular.containers import Table +from safeds.data.tabular.containers import Cell, Column, Table def assert_tables_equal(table1: Table, table2: Table) -> None: @@ -9,9 +12,9 @@ def assert_tables_equal(table1: Table, table2: Table) -> None: Parameters ---------- - table1: Table + table1: The first table. - table2: Table + table2: The table to compare the first table to. """ assert_frame_equal(table1._data_frame, table2._data_frame) @@ -23,12 +26,34 @@ def assert_that_tabular_datasets_are_equal(table1: TabularDataset, table2: Tabul Parameters ---------- - table1: TabularDataset + table1: The first table. - table2: TabularDataset + table2: The table to compare the first table to. """ assert table1._table.schema == table2._table.schema assert table1.features == table2.features assert table1.target == table2.target assert table1 == table2 + + +def assert_cell_operation_works( + input_value: Any, + transformer: Callable[[Cell], Cell], + expected_value: Any, +) -> None: + """ + Assert that a cell operation works as expected. + + Parameters + ---------- + input_value: + The value in the input cell. + transformer: + The transformer to apply to the cells. + expected_value: + The expected value of the transformed cell. + """ + column = Column("A", [input_value]) + transformed_column = column.transform(transformer) + assert transformed_column == Column("A", [expected_value]), f"Expected: {expected_value}\nGot: {transformed_column}" diff --git a/tests/safeds/data/tabular/containers/_cell/__init__.py b/tests/safeds/data/tabular/containers/_cell/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/safeds/data/tabular/containers/_cell/test_abs.py b/tests/safeds/data/tabular/containers/_cell/test_abs.py new file mode 100644 index 000000000..a01749e32 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_cell/test_abs.py @@ -0,0 +1,30 @@ +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (0, 0), + (0.0, 0.0), + (10, 10), + (10.5, 10.5), + (-10, 10), + (-10.5, 10.5), + ], + ids=[ + "zero int", + "zero float", + "positive int", + "positive float", + "negative int", + "negative float", + ], +) +class TestShouldReturnAbsoluteValueOfCell: + def test_dunder_method(self, value: float, expected: float) -> None: + assert_cell_operation_works(value, lambda cell: abs(cell), expected) + + def test_named_method(self, value: float, expected: float) -> None: + assert_cell_operation_works(value, lambda cell: cell.abs(), expected) diff --git a/tests/safeds/data/tabular/containers/_cell/test_add.py b/tests/safeds/data/tabular/containers/_cell/test_add.py new file mode 100644 index 000000000..5773ae9d3 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_cell/test_add.py @@ -0,0 +1,29 @@ +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value1", "value2", "expected"), + [ + (3, 3, 6), + (3, 1.5, 4.5), + (1.5, 3, 4.5), + (1.5, 1.5, 3.0), + ], + ids=[ + "int - int", + "int - float", + "float - int", + "float - float", + ], +) +class TestShouldComputeAddition: + def test_dunder_method(self, value1: float, value2: float, expected: float) -> None: + assert_cell_operation_works(value1, lambda cell: cell + value2, expected) + + def test_dunder_method_inverted_order(self, value1: float, value2: float, expected: float) -> None: + assert_cell_operation_works(value2, lambda cell: value1 + cell, expected) + + def test_named_method(self, value1: float, value2: float, expected: float) -> None: + assert_cell_operation_works(value1, lambda cell: cell.add(value2), expected) diff --git a/tests/safeds/data/tabular/containers/_cell/test_and.py b/tests/safeds/data/tabular/containers/_cell/test_and.py new file mode 100644 index 000000000..cbae19fb5 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_cell/test_and.py @@ -0,0 +1,39 @@ +from typing import Any + +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value1", "value2", "expected"), + [ + (False, False, False), + (False, True, False), + (True, False, False), + (True, True, True), + (0, False, False), + (0, True, False), + (1, False, False), + (1, True, True), + ], + ids=[ + "false - false", + "false - true", + "true - false", + "true - true", + "falsy int - false", + "falsy int - true", + "truthy int - false", + "truthy int - true", + ], +) +class TestShouldComputeConjunction: + def test_dunder_method(self, value1: Any, value2: bool, expected: bool) -> None: + assert_cell_operation_works(value1, lambda cell: cell & value2, expected) + + def test_dunder_method_inverted_order(self, value1: Any, value2: bool, expected: bool) -> None: + assert_cell_operation_works(value2, lambda cell: value1 & cell, expected) + + def test_named_method(self, value1: Any, value2: bool, expected: bool) -> None: + assert_cell_operation_works(value1, lambda cell: cell.and_(value2), expected) diff --git a/tests/safeds/data/tabular/containers/_cell/test_ceil.py b/tests/safeds/data/tabular/containers/_cell/test_ceil.py new file mode 100644 index 000000000..b1cd6bed0 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_cell/test_ceil.py @@ -0,0 +1,32 @@ +import math + +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (0, 0), + (0.0, 0), + (10, 10), + (10.5, 11), + (-10, -10), + (-10.5, -10), + ], + ids=[ + "zero int", + "zero float", + "positive int", + "positive float", + "negative int", + "negative float", + ], +) +class TestShouldReturnCeilOfCell: + def test_dunder_method(self, value: float, expected: float) -> None: + assert_cell_operation_works(value, lambda cell: math.ceil(cell), expected) + + def test_named_method(self, value: float, expected: float) -> None: + assert_cell_operation_works(value, lambda cell: cell.ceil(), expected) diff --git a/tests/safeds/data/tabular/containers/_cell/test_div.py b/tests/safeds/data/tabular/containers/_cell/test_div.py new file mode 100644 index 000000000..e2ad00380 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_cell/test_div.py @@ -0,0 +1,29 @@ +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value1", "value2", "expected"), + [ + (3, 3, 1), + (3, 1.5, 2.0), + (1.5, 3, 0.5), + (1.5, 1.5, 1.0), + ], + ids=[ + "int - int", + "int - float", + "float - int", + "float - float", + ], +) +class TestShouldComputeDivision: + def test_dunder_method(self, value1: float, value2: float, expected: float) -> None: + assert_cell_operation_works(value1, lambda cell: cell / value2, expected) + + def test_dunder_method_inverted_order(self, value1: float, value2: float, expected: float) -> None: + assert_cell_operation_works(value2, lambda cell: value1 / cell, expected) + + def test_named_method(self, value1: float, value2: float, expected: float) -> None: + assert_cell_operation_works(value1, lambda cell: cell.div(value2), expected) diff --git a/tests/safeds/data/tabular/containers/_cell/test_eq.py b/tests/safeds/data/tabular/containers/_cell/test_eq.py new file mode 100644 index 000000000..9347e1456 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_cell/test_eq.py @@ -0,0 +1,29 @@ +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value1", "value2", "expected"), + [ + (3, 3, True), + (3, 1.5, False), + (1.5, 3, False), + (1.5, 1.5, True), + ], + ids=[ + "int - int", + "int - float", + "float - int", + "float - float", + ], +) +class TestShouldComputeEquality: + def test_dunder_method(self, value1: float, value2: float, expected: bool) -> None: + assert_cell_operation_works(value1, lambda cell: cell == value2, expected) + + def test_dunder_method_inverted_order(self, value1: float, value2: float, expected: bool) -> None: + assert_cell_operation_works(value2, lambda cell: value1 == cell, expected) # type: ignore[arg-type,return-value] + + def test_named_method(self, value1: float, value2: float, expected: bool) -> None: + assert_cell_operation_works(value1, lambda cell: cell.eq(value2), expected) diff --git a/tests/safeds/data/tabular/containers/_cell/test_equals.py b/tests/safeds/data/tabular/containers/_cell/test_equals.py new file mode 100644 index 000000000..3a859c08a --- /dev/null +++ b/tests/safeds/data/tabular/containers/_cell/test_equals.py @@ -0,0 +1,41 @@ +from typing import Any + +import polars as pl +import pytest +from safeds.data.tabular.containers import Cell, Table +from safeds.data.tabular.containers._lazy_cell import _LazyCell + + +@pytest.mark.parametrize( + ("cell1", "cell2", "expected"), + [ + (_LazyCell(pl.col("a")), _LazyCell(pl.col("a")), True), + (_LazyCell(pl.col("a")), _LazyCell(pl.col("b")), False), + ], + ids=[ + "equal", + "different", + ], +) +def test_should_return_whether_two_cells_are_equal(cell1: Cell, cell2: Cell, expected: bool) -> None: + assert (cell1._equals(cell2)) == expected + + +def test_should_return_true_if_objects_are_identical() -> None: + cell: Cell[Any] = _LazyCell(pl.col("a")) + assert (cell._equals(cell)) is True + + +@pytest.mark.parametrize( + ("cell", "other"), + [ + (_LazyCell(pl.col("a")), None), + (_LazyCell(pl.col("a")), Table()), + ], + ids=[ + "Cell vs. None", + "Cell vs. Table", + ], +) +def test_should_return_not_implemented_if_other_is_not_cell(cell: Cell, other: Any) -> None: + assert (cell._equals(other)) is NotImplemented diff --git a/tests/safeds/data/tabular/containers/_cell/test_floor.py b/tests/safeds/data/tabular/containers/_cell/test_floor.py new file mode 100644 index 000000000..72590efa8 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_cell/test_floor.py @@ -0,0 +1,32 @@ +import math + +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (0, 0), + (0.0, 0), + (10, 10), + (10.5, 10), + (-10, -10), + (-10.5, -11), + ], + ids=[ + "zero int", + "zero float", + "positive int", + "positive float", + "negative int", + "negative float", + ], +) +class TestShouldReturnFloorOfCell: + def test_dunder_method(self, value: float, expected: float) -> None: + assert_cell_operation_works(value, lambda cell: math.floor(cell), expected) + + def test_named_method(self, value: float, expected: float) -> None: + assert_cell_operation_works(value, lambda cell: cell.floor(), expected) diff --git a/tests/safeds/data/tabular/containers/_cell/test_floordiv.py b/tests/safeds/data/tabular/containers/_cell/test_floordiv.py new file mode 100644 index 000000000..495b129cc --- /dev/null +++ b/tests/safeds/data/tabular/containers/_cell/test_floordiv.py @@ -0,0 +1,26 @@ +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value1", "value2", "expected"), + [ + (3, 2, 1), + (3, 1.6, 1), + (1.5, 3, 0), + (1.5, 1.4, 1), + ], + ids=[ + "int - int", + "int - float", + "float - int", + "float - float", + ], +) +class TestShouldComputeDivision: + def test_dunder_method(self, value1: float, value2: float, expected: float) -> None: + assert_cell_operation_works(value1, lambda cell: cell // value2, expected) + + def test_dunder_method_inverted_order(self, value1: float, value2: float, expected: float) -> None: + assert_cell_operation_works(value2, lambda cell: value1 // cell, expected) diff --git a/tests/safeds/data/tabular/containers/_cell/test_ge.py b/tests/safeds/data/tabular/containers/_cell/test_ge.py new file mode 100644 index 000000000..f285f4ae2 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_cell/test_ge.py @@ -0,0 +1,29 @@ +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value1", "value2", "expected"), + [ + (3, 3, True), + (3, 1.5, True), + (1.5, 3, False), + (1.5, 1.5, True), + ], + ids=[ + "int - int", + "int - float", + "float - int", + "float - float", + ], +) +class TestShouldComputeGreaterThanOrEqual: + def test_dunder_method(self, value1: float, value2: float, expected: bool) -> None: + assert_cell_operation_works(value1, lambda cell: cell >= value2, expected) + + def test_dunder_method_inverted_order(self, value1: float, value2: float, expected: bool) -> None: + assert_cell_operation_works(value2, lambda cell: value1 >= cell, expected) + + def test_named_method(self, value1: float, value2: float, expected: bool) -> None: + assert_cell_operation_works(value1, lambda cell: cell.ge(value2), expected) diff --git a/tests/safeds/data/tabular/containers/_cell/test_gt.py b/tests/safeds/data/tabular/containers/_cell/test_gt.py new file mode 100644 index 000000000..a28704793 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_cell/test_gt.py @@ -0,0 +1,29 @@ +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value1", "value2", "expected"), + [ + (3, 3, False), + (3, 1.5, True), + (1.5, 3, False), + (1.5, 1.5, False), + ], + ids=[ + "int - int", + "int - float", + "float - int", + "float - float", + ], +) +class TestShouldComputeGreaterThan: + def test_dunder_method(self, value1: float, value2: float, expected: bool) -> None: + assert_cell_operation_works(value1, lambda cell: cell > value2, expected) + + def test_dunder_method_inverted_order(self, value1: float, value2: float, expected: bool) -> None: + assert_cell_operation_works(value2, lambda cell: value1 > cell, expected) + + def test_named_method(self, value1: float, value2: float, expected: bool) -> None: + assert_cell_operation_works(value1, lambda cell: cell.gt(value2), expected) diff --git a/tests/safeds/data/tabular/containers/_cell/test_hash.py b/tests/safeds/data/tabular/containers/_cell/test_hash.py new file mode 100644 index 000000000..cdea9c706 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_cell/test_hash.py @@ -0,0 +1,26 @@ +from typing import Any + +import polars as pl +import pytest +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.containers._lazy_cell import _LazyCell + + +def test_should_be_deterministic() -> None: + cell: Cell[Any] = _LazyCell(pl.col("a")) + assert hash(cell) == 7139977585477665635 + + +@pytest.mark.parametrize( + ("cell1", "cell2", "expected"), + [ + (_LazyCell(pl.col("a")), _LazyCell(pl.col("a")), True), + (_LazyCell(pl.col("a")), _LazyCell(pl.col("b")), False), + ], + ids=[ + "equal", + "different", + ], +) +def test_should_be_good_hash(cell1: Cell, cell2: Cell, expected: bool) -> None: + assert (hash(cell1) == hash(cell2)) == expected diff --git a/tests/safeds/data/tabular/containers/_cell/test_le.py b/tests/safeds/data/tabular/containers/_cell/test_le.py new file mode 100644 index 000000000..d2ea39816 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_cell/test_le.py @@ -0,0 +1,29 @@ +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value1", "value2", "expected"), + [ + (3, 3, True), + (3, 1.5, False), + (1.5, 3, True), + (1.5, 1.5, True), + ], + ids=[ + "int - int", + "int - float", + "float - int", + "float - float", + ], +) +class TestShouldComputeLessThanOrEqual: + def test_dunder_method(self, value1: float, value2: float, expected: bool) -> None: + assert_cell_operation_works(value1, lambda cell: cell <= value2, expected) + + def test_dunder_method_inverted_order(self, value1: float, value2: float, expected: bool) -> None: + assert_cell_operation_works(value2, lambda cell: value1 <= cell, expected) + + def test_named_method(self, value1: float, value2: float, expected: bool) -> None: + assert_cell_operation_works(value1, lambda cell: cell.le(value2), expected) diff --git a/tests/safeds/data/tabular/containers/_cell/test_lt.py b/tests/safeds/data/tabular/containers/_cell/test_lt.py new file mode 100644 index 000000000..2dc961031 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_cell/test_lt.py @@ -0,0 +1,29 @@ +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value1", "value2", "expected"), + [ + (3, 3, False), + (3, 1.5, False), + (1.5, 3, True), + (1.5, 1.5, False), + ], + ids=[ + "int - int", + "int - float", + "float - int", + "float - float", + ], +) +class TestShouldComputeLessThan: + def test_dunder_method(self, value1: float, value2: float, expected: bool) -> None: + assert_cell_operation_works(value1, lambda cell: cell < value2, expected) + + def test_dunder_method_inverted_order(self, value1: float, value2: float, expected: bool) -> None: + assert_cell_operation_works(value2, lambda cell: value1 < cell, expected) + + def test_named_method(self, value1: float, value2: float, expected: bool) -> None: + assert_cell_operation_works(value1, lambda cell: cell.lt(value2), expected) diff --git a/tests/safeds/data/tabular/containers/_cell/test_mod.py b/tests/safeds/data/tabular/containers/_cell/test_mod.py new file mode 100644 index 000000000..e71bc4642 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_cell/test_mod.py @@ -0,0 +1,29 @@ +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value1", "value2", "expected"), + [ + (3, 3, 0), + (3, 1.5, 0.0), + (1.5, 3, 1.5), + (1.5, 1.5, 0.0), + ], + ids=[ + "int - int", + "int - float", + "float - int", + "float - float", + ], +) +class TestShouldComputeModulus: + def test_dunder_method(self, value1: float, value2: float, expected: float) -> None: + assert_cell_operation_works(value1, lambda cell: cell % value2, expected) + + def test_dunder_method_inverted_order(self, value1: float, value2: float, expected: float) -> None: + assert_cell_operation_works(value2, lambda cell: value1 % cell, expected) + + def test_named_method(self, value1: float, value2: float, expected: float) -> None: + assert_cell_operation_works(value1, lambda cell: cell.mod(value2), expected) diff --git a/tests/safeds/data/tabular/containers/_cell/test_mul.py b/tests/safeds/data/tabular/containers/_cell/test_mul.py new file mode 100644 index 000000000..a9da48885 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_cell/test_mul.py @@ -0,0 +1,29 @@ +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value1", "value2", "expected"), + [ + (3, 3, 9), + (3, 1.5, 4.5), + (1.5, 3, 4.5), + (1.5, 1.5, 2.25), + ], + ids=[ + "int - int", + "int - float", + "float - int", + "float - float", + ], +) +class TestShouldComputeMultiplication: + def test_dunder_method(self, value1: float, value2: float, expected: float) -> None: + assert_cell_operation_works(value1, lambda cell: cell * value2, expected) + + def test_dunder_method_inverted_order(self, value1: float, value2: float, expected: float) -> None: + assert_cell_operation_works(value2, lambda cell: value1 * cell, expected) + + def test_named_method(self, value1: float, value2: float, expected: float) -> None: + assert_cell_operation_works(value1, lambda cell: cell.mul(value2), expected) diff --git a/tests/safeds/data/tabular/containers/_cell/test_ne.py b/tests/safeds/data/tabular/containers/_cell/test_ne.py new file mode 100644 index 000000000..e826c2b6e --- /dev/null +++ b/tests/safeds/data/tabular/containers/_cell/test_ne.py @@ -0,0 +1,26 @@ +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value1", "value2", "expected"), + [ + (3, 3, False), + (3, 1.5, True), + (1.5, 3, True), + (1.5, 1.5, False), + ], + ids=[ + "int - int", + "int - float", + "float - int", + "float - float", + ], +) +class TestShouldComputeNegatedEquality: + def test_dunder_method(self, value1: float, value2: float, expected: bool) -> None: + assert_cell_operation_works(value1, lambda cell: cell != value2, expected) + + def test_dunder_method_inverted_order(self, value1: float, value2: float, expected: bool) -> None: + assert_cell_operation_works(value1, lambda cell: value2 != cell, expected) # type: ignore[arg-type,return-value] diff --git a/tests/safeds/data/tabular/containers/_cell/test_neg.py b/tests/safeds/data/tabular/containers/_cell/test_neg.py new file mode 100644 index 000000000..306fdd530 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_cell/test_neg.py @@ -0,0 +1,30 @@ +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (0, 0), + (0.0, 0.0), + (10, -10), + (10.5, -10.5), + (-10, 10), + (-10.5, 10.5), + ], + ids=[ + "zero int", + "zero float", + "positive int", + "positive float", + "negative int", + "negative float", + ], +) +class TestShouldNegateValueOfCell: + def test_dunder_method(self, value: float, expected: float) -> None: + assert_cell_operation_works(value, lambda cell: -cell, expected) + + def test_named_method(self, value: float, expected: float) -> None: + assert_cell_operation_works(value, lambda cell: cell.neg(), expected) diff --git a/tests/safeds/data/tabular/containers/_cell/test_not.py b/tests/safeds/data/tabular/containers/_cell/test_not.py new file mode 100644 index 000000000..6381200b7 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_cell/test_not.py @@ -0,0 +1,28 @@ +from typing import Any + +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (False, True), + (True, False), + (0, True), + (1, False), + ], + ids=[ + "false", + "true", + "falsy int", + "truthy int", + ], +) +class TestShouldInvertValueOfCell: + def test_dunder_method(self, value: Any, expected: bool) -> None: + assert_cell_operation_works(value, lambda cell: ~cell, expected) + + def test_named_method(self, value: Any, expected: bool) -> None: + assert_cell_operation_works(value, lambda cell: cell.not_(), expected) diff --git a/tests/safeds/data/tabular/containers/_cell/test_or.py b/tests/safeds/data/tabular/containers/_cell/test_or.py new file mode 100644 index 000000000..edf5bc89c --- /dev/null +++ b/tests/safeds/data/tabular/containers/_cell/test_or.py @@ -0,0 +1,39 @@ +from typing import Any + +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value1", "value2", "expected"), + [ + (False, False, False), + (False, True, True), + (True, False, True), + (True, True, True), + (0, False, False), + (0, True, True), + (1, False, True), + (1, True, True), + ], + ids=[ + "false - false", + "false - true", + "true - false", + "true - true", + "falsy int - false", + "falsy int - true", + "truthy int - false", + "truthy int - true", + ], +) +class TestShouldComputeDisjunction: + def test_dunder_method(self, value1: Any, value2: bool, expected: bool) -> None: + assert_cell_operation_works(value1, lambda cell: cell | value2, expected) + + def test_dunder_method_inverted_order(self, value1: Any, value2: bool, expected: bool) -> None: + assert_cell_operation_works(value2, lambda cell: value1 | cell, expected) + + def test_named_method(self, value1: Any, value2: bool, expected: bool) -> None: + assert_cell_operation_works(value1, lambda cell: cell.or_(value2), expected) diff --git a/tests/safeds/data/tabular/containers/_cell/test_pos.py b/tests/safeds/data/tabular/containers/_cell/test_pos.py new file mode 100644 index 000000000..da37cc41f --- /dev/null +++ b/tests/safeds/data/tabular/containers/_cell/test_pos.py @@ -0,0 +1,27 @@ +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (0, 0), + (0.0, 0.0), + (10, 10), + (10.5, 10.5), + (-10, -10), + (-10.5, -10.5), + ], + ids=[ + "zero int", + "zero float", + "positive int", + "positive float", + "negative int", + "negative float", + ], +) +class TestShouldReturnValueOfCell: + def test_dunder_method(self, value: float, expected: float) -> None: + assert_cell_operation_works(value, lambda cell: +cell, expected) diff --git a/tests/safeds/data/tabular/containers/_cell/test_pow.py b/tests/safeds/data/tabular/containers/_cell/test_pow.py new file mode 100644 index 000000000..40a2d5216 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_cell/test_pow.py @@ -0,0 +1,29 @@ +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value1", "value2", "expected"), + [ + (3, 2, 9), + (4, 0.5, 2.0), + (1.5, 2, 2.25), + (2.25, 0.5, 1.5), + ], + ids=[ + "int - int", + "int - float", + "float - int", + "float - float", + ], +) +class TestShouldComputePower: + def test_dunder_method(self, value1: float, value2: float, expected: float) -> None: + assert_cell_operation_works(value1, lambda cell: cell**value2, expected) + + def test_dunder_method_inverted_order(self, value1: float, value2: float, expected: float) -> None: + assert_cell_operation_works(value2, lambda cell: value1**cell, expected) + + def test_named_method(self, value1: float, value2: float, expected: float) -> None: + assert_cell_operation_works(value1, lambda cell: cell.pow(value2), expected) diff --git a/tests/safeds/data/tabular/containers/_cell/test_sizeof.py b/tests/safeds/data/tabular/containers/_cell/test_sizeof.py new file mode 100644 index 000000000..1043ed5df --- /dev/null +++ b/tests/safeds/data/tabular/containers/_cell/test_sizeof.py @@ -0,0 +1,13 @@ +import sys +from typing import TYPE_CHECKING, Any + +import polars as pl +from safeds.data.tabular.containers._lazy_cell import _LazyCell + +if TYPE_CHECKING: + from safeds.data.tabular.containers import Cell + + +def test_should_return_size_greater_than_normal_object() -> None: + cell: Cell[Any] = _LazyCell(pl.col("a")) + assert sys.getsizeof(cell) > sys.getsizeof(object()) diff --git a/tests/safeds/data/tabular/containers/_cell/test_sub.py b/tests/safeds/data/tabular/containers/_cell/test_sub.py new file mode 100644 index 000000000..f5dc9b885 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_cell/test_sub.py @@ -0,0 +1,29 @@ +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value1", "value2", "expected"), + [ + (3, 3, 0), + (3, 1.5, 1.5), + (1.5, 3, -1.5), + (1.5, 1.5, 0.0), + ], + ids=[ + "int - int", + "int - float", + "float - int", + "float - float", + ], +) +class TestShouldComputeSubtraction: + def test_dunder_method(self, value1: float, value2: float, expected: float) -> None: + assert_cell_operation_works(value1, lambda cell: cell - value2, expected) + + def test_dunder_method_inverted_order(self, value1: float, value2: float, expected: float) -> None: + assert_cell_operation_works(value2, lambda cell: value1 - cell, expected) + + def test_named_method(self, value1: float, value2: float, expected: float) -> None: + assert_cell_operation_works(value1, lambda cell: cell.sub(value2), expected) diff --git a/tests/safeds/data/tabular/containers/_cell/test_xor.py b/tests/safeds/data/tabular/containers/_cell/test_xor.py new file mode 100644 index 000000000..256846491 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_cell/test_xor.py @@ -0,0 +1,39 @@ +from typing import Any + +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value1", "value2", "expected"), + [ + (False, False, False), + (False, True, True), + (True, False, True), + (True, True, False), + (0, False, False), + (0, True, True), + (1, False, True), + (1, True, False), + ], + ids=[ + "false - false", + "false - true", + "true - false", + "true - true", + "falsy int - false", + "falsy int - true", + "truthy int - false", + "truthy int - true", + ], +) +class TestShouldComputeExclusiveOr: + def test_dunder_method(self, value1: Any, value2: bool, expected: bool) -> None: + assert_cell_operation_works(value1, lambda cell: cell ^ value2, expected) + + def test_dunder_method_inverted_order(self, value1: Any, value2: bool, expected: bool) -> None: + assert_cell_operation_works(value2, lambda cell: value1 ^ cell, expected) + + def test_named_method(self, value1: Any, value2: bool, expected: bool) -> None: + assert_cell_operation_works(value1, lambda cell: cell.xor(value2), expected) diff --git a/tests/safeds/data/tabular/containers/_column/test_hash.py b/tests/safeds/data/tabular/containers/_column/test_hash.py index e388de15e..0f6900bcd 100644 --- a/tests/safeds/data/tabular/containers/_column/test_hash.py +++ b/tests/safeds/data/tabular/containers/_column/test_hash.py @@ -18,33 +18,22 @@ def test_should_be_deterministic(column: Column, expected: int) -> None: @pytest.mark.parametrize( - ("column1", "column2"), + ("column1", "column2", "expected"), [ - (Column("a"), Column("a")), - (Column("a", [1, 2, 3]), Column("a", [1, 2, 3])), - ], - ids=[ - "empty", - "non-empty", - ], -) -def test_should_return_same_hash_for_equal_columns(column1: Column, column2: Column) -> None: - assert hash(column1) == hash(column2) - - -@pytest.mark.parametrize( - ("column1", "column2"), - [ - (Column("a"), Column("b")), - (Column("a", [1, 2, 3]), Column("a", [1, 2])), - (Column("a", [1, 2, 3]), Column("a", ["1", "2", "3"])), + (Column("a"), Column("a"), True), + (Column("a", [1, 2, 3]), Column("a", [1, 2, 3]), True), + (Column("a"), Column("b"), False), + (Column("a", [1, 2, 3]), Column("a", [1, 2]), False), + (Column("a", [1, 2, 3]), Column("a", ["1", "2", "3"]), False), # We don't use the column values in the hash calculation ], ids=[ + "equal empty", + "equal non-empty", "different names", "different lengths", "different types", ], ) -def test_should_ideally_return_different_hash_for_unequal_columns(column1: Column, column2: Column) -> None: - assert hash(column1) != hash(column2) +def test_should_be_good_hash(column1: Column, column2: Column, expected: bool) -> None: + assert (hash(column1) == hash(column2)) == expected diff --git a/tests/safeds/data/tabular/containers/_string_cell/__init__.py b/tests/safeds/data/tabular/containers/_string_cell/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/safeds/data/tabular/containers/_string_cell/test_contains.py b/tests/safeds/data/tabular/containers/_string_cell/test_contains.py new file mode 100644 index 000000000..bac8e354a --- /dev/null +++ b/tests/safeds/data/tabular/containers/_string_cell/test_contains.py @@ -0,0 +1,22 @@ +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("string", "substring", "expected"), + [ + ("", "a", False), + ("abc", "", True), + ("abc", "a", True), + ("abc", "d", False), + ], + ids=[ + "empty string", + "empty substring", + "contained", + "not contained", + ], +) +def test_should_check_whether_string_contains_substring(string: str, substring: str, expected: bool) -> None: + assert_cell_operation_works(string, lambda cell: cell.str.contains(substring), expected) diff --git a/tests/safeds/data/tabular/containers/_string_cell/test_ends_with.py b/tests/safeds/data/tabular/containers/_string_cell/test_ends_with.py new file mode 100644 index 000000000..78102c900 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_string_cell/test_ends_with.py @@ -0,0 +1,22 @@ +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("string", "suffix", "expected"), + [ + ("", "a", False), + ("abc", "", True), + ("abc", "c", True), + ("abc", "a", False), + ], + ids=[ + "empty string", + "empty suffix", + "ends with", + "does not end with", + ], +) +def test_should_check_whether_string_ends_with_prefix(string: str, suffix: str, expected: bool) -> None: + assert_cell_operation_works(string, lambda cell: cell.str.ends_with(suffix), expected) diff --git a/tests/safeds/data/tabular/containers/_string_cell/test_equals.py b/tests/safeds/data/tabular/containers/_string_cell/test_equals.py new file mode 100644 index 000000000..2f0ad4ad1 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_string_cell/test_equals.py @@ -0,0 +1,41 @@ +from typing import Any + +import polars as pl +import pytest +from safeds.data.tabular.containers import StringCell, Table +from safeds.data.tabular.containers._lazy_string_cell import _LazyStringCell + + +@pytest.mark.parametrize( + ("cell1", "cell2", "expected"), + [ + (_LazyStringCell(pl.col("a")), _LazyStringCell(pl.col("a")), True), + (_LazyStringCell(pl.col("a")), _LazyStringCell(pl.col("b")), False), + ], + ids=[ + "equal", + "different", + ], +) +def test_should_return_whether_two_cells_are_equal(cell1: StringCell, cell2: StringCell, expected: bool) -> None: + assert (cell1._equals(cell2)) == expected + + +def test_should_return_true_if_objects_are_identical() -> None: + cell = _LazyStringCell(pl.col("a")) + assert (cell._equals(cell)) is True + + +@pytest.mark.parametrize( + ("cell", "other"), + [ + (_LazyStringCell(pl.col("a")), None), + (_LazyStringCell(pl.col("a")), Table()), + ], + ids=[ + "Cell vs. None", + "Cell vs. Table", + ], +) +def test_should_return_not_implemented_if_other_is_not_cell(cell: StringCell, other: Any) -> None: + assert (cell._equals(other)) is NotImplemented diff --git a/tests/safeds/data/tabular/containers/_string_cell/test_hash.py b/tests/safeds/data/tabular/containers/_string_cell/test_hash.py new file mode 100644 index 000000000..db9837178 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_string_cell/test_hash.py @@ -0,0 +1,24 @@ +import polars as pl +import pytest +from safeds.data.tabular.containers import StringCell +from safeds.data.tabular.containers._lazy_string_cell import _LazyStringCell + + +def test_should_be_deterministic() -> None: + cell = _LazyStringCell(pl.col("a")) + assert hash(cell) == 7139977585477665635 + + +@pytest.mark.parametrize( + ("cell1", "cell2", "expected"), + [ + (_LazyStringCell(pl.col("a")), _LazyStringCell(pl.col("a")), True), + (_LazyStringCell(pl.col("a")), _LazyStringCell(pl.col("b")), False), + ], + ids=[ + "equal", + "different", + ], +) +def test_should_be_good_hash(cell1: StringCell, cell2: StringCell, expected: bool) -> None: + assert (hash(cell1) == hash(cell2)) == expected diff --git a/tests/safeds/data/tabular/containers/_string_cell/test_index_of.py b/tests/safeds/data/tabular/containers/_string_cell/test_index_of.py new file mode 100644 index 000000000..84e79ad1b --- /dev/null +++ b/tests/safeds/data/tabular/containers/_string_cell/test_index_of.py @@ -0,0 +1,22 @@ +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("string", "substring", "expected"), + [ + ("", "a", None), + ("abc", "", 0), + ("abc", "b", 1), + ("abc", "d", None), + ], + ids=[ + "empty string", + "empty substring", + "contained", + "not contained", + ], +) +def test_should_return_index_of_first_occurrence_of_substring(string: str, substring: str, expected: bool) -> None: + assert_cell_operation_works(string, lambda cell: cell.str.index_of(substring), expected) diff --git a/tests/safeds/data/tabular/containers/_string_cell/test_length.py b/tests/safeds/data/tabular/containers/_string_cell/test_length.py new file mode 100644 index 000000000..5b7f0370b --- /dev/null +++ b/tests/safeds/data/tabular/containers/_string_cell/test_length.py @@ -0,0 +1,26 @@ +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("string", "optimize_for_ascii", "expected"), + [ + ("", False, 0), + ("", True, 0), + ("abc", False, 3), + ("abc", True, 3), + ], + ids=[ + "empty (unoptimized)", + "empty (optimized)", + "non-empty (unoptimized)", + "non-empty (optimized)", + ], +) +def test_should_return_number_of_characters(string: str, optimize_for_ascii: bool, expected: bool) -> None: + assert_cell_operation_works( + string, + lambda cell: cell.str.length(optimize_for_ascii=optimize_for_ascii), + expected, + ) diff --git a/tests/safeds/data/tabular/containers/_string_cell/test_replace.py b/tests/safeds/data/tabular/containers/_string_cell/test_replace.py new file mode 100644 index 000000000..f1f32c07a --- /dev/null +++ b/tests/safeds/data/tabular/containers/_string_cell/test_replace.py @@ -0,0 +1,24 @@ +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("string", "old", "new", "expected"), + [ + ("", "a", "b", ""), + ("abc", "", "d", "dadbdcd"), + ("abc", "a", "", "bc"), + ("abc", "d", "e", "abc"), + ("aba", "a", "d", "dbd"), + ], + ids=[ + "empty string", + "empty old", + "empty new", + "no occurrences", + "replace all occurrences", + ], +) +def test_should_replace_all_occurrences_of_old_with_new(string: str, old: str, new: str, expected: str) -> None: + assert_cell_operation_works(string, lambda cell: cell.str.replace(old, new), expected) diff --git a/tests/safeds/data/tabular/containers/_string_cell/test_sizeof.py b/tests/safeds/data/tabular/containers/_string_cell/test_sizeof.py new file mode 100644 index 000000000..43df6affb --- /dev/null +++ b/tests/safeds/data/tabular/containers/_string_cell/test_sizeof.py @@ -0,0 +1,9 @@ +import sys + +import polars as pl +from safeds.data.tabular.containers._lazy_string_cell import _LazyStringCell + + +def test_should_return_size_greater_than_normal_object() -> None: + cell = _LazyStringCell(pl.col("a")) + assert sys.getsizeof(cell) > sys.getsizeof(object()) diff --git a/tests/safeds/data/tabular/containers/_string_cell/test_starts_with.py b/tests/safeds/data/tabular/containers/_string_cell/test_starts_with.py new file mode 100644 index 000000000..7d402cd0b --- /dev/null +++ b/tests/safeds/data/tabular/containers/_string_cell/test_starts_with.py @@ -0,0 +1,22 @@ +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("string", "prefix", "expected"), + [ + ("", "a", False), + ("abc", "", True), + ("abc", "a", True), + ("abc", "c", False), + ], + ids=[ + "empty string", + "empty prefix", + "starts with", + "does not start with", + ], +) +def test_should_check_whether_string_start_with_prefix(string: str, prefix: str, expected: bool) -> None: + assert_cell_operation_works(string, lambda cell: cell.str.starts_with(prefix), expected) diff --git a/tests/safeds/data/tabular/containers/_string_cell/test_substring.py b/tests/safeds/data/tabular/containers/_string_cell/test_substring.py new file mode 100644 index 000000000..1305e76b3 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_string_cell/test_substring.py @@ -0,0 +1,36 @@ +import pytest +from safeds.exceptions import OutOfBoundsError + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("string", "start", "length", "expected"), + [ + ("", 0, None, ""), + ("abc", 0, None, "abc"), + ("abc", 1, None, "bc"), + ("abc", 10, None, ""), + ("abc", -1, None, "c"), + ("abc", -10, None, "abc"), + ("abc", 0, 1, "a"), + ("abc", 0, 10, "abc"), + ], + ids=[ + "empty", + "full string", + "positive start in bounds", + "positive start out of bounds", + "negative start in bounds", + "negative start out of bounds", + "positive length in bounds", + "positive length out of bounds", + ], +) +def test_should_return_substring(string: str, start: int, length: int | None, expected: str) -> None: + assert_cell_operation_works(string, lambda cell: cell.str.substring(start, length), expected) + + +def test_should_raise_if_length_is_negative() -> None: + with pytest.raises(OutOfBoundsError): + assert_cell_operation_works("abc", lambda cell: cell.str.substring(length=-1), None) diff --git a/tests/safeds/data/tabular/containers/_string_cell/test_to_date.py b/tests/safeds/data/tabular/containers/_string_cell/test_to_date.py new file mode 100644 index 000000000..677438e0a --- /dev/null +++ b/tests/safeds/data/tabular/containers/_string_cell/test_to_date.py @@ -0,0 +1,22 @@ +import datetime + +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("string", "expected"), + [ + ("", None), + ("2022-01-09", datetime.date(2022, 1, 9)), + ("abc", None), + ], + ids=[ + "empty", + "ISO date", + "invalid string", + ], +) +def test_should_parse_date(string: str, expected: bool) -> None: + assert_cell_operation_works(string, lambda cell: cell.str.to_date(), expected) diff --git a/tests/safeds/data/tabular/containers/_string_cell/test_to_datetime.py b/tests/safeds/data/tabular/containers/_string_cell/test_to_datetime.py new file mode 100644 index 000000000..4c96d03d0 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_string_cell/test_to_datetime.py @@ -0,0 +1,22 @@ +import datetime + +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("string", "expected"), + [ + ("", None), + ("2022-01-09T23:29:01Z", datetime.datetime(2022, 1, 9, 23, 29, 1, tzinfo=datetime.UTC)), + ("abc", None), + ], + ids=[ + "empty", + "ISO datetime", + "invalid string", + ], +) +def test_should_parse_datetimes(string: str, expected: bool) -> None: + assert_cell_operation_works(string, lambda cell: cell.str.to_datetime(), expected) diff --git a/tests/safeds/data/tabular/containers/_string_cell/test_to_float.py b/tests/safeds/data/tabular/containers/_string_cell/test_to_float.py new file mode 100644 index 000000000..f9ea7ef9c --- /dev/null +++ b/tests/safeds/data/tabular/containers/_string_cell/test_to_float.py @@ -0,0 +1,24 @@ +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("string", "expected"), + [ + ("", None), + ("11", 11), + ("11.5", 11.5), + ("10e-1", 1.0), + ("abc", None), + ], + ids=[ + "empty", + "integer", + "float", + "scientific notation", + "invalid string", + ], +) +def test_should_parse_float(string: str, expected: bool) -> None: + assert_cell_operation_works(string, lambda cell: cell.str.to_float(), expected) diff --git a/tests/safeds/data/tabular/containers/_string_cell/test_to_int.py b/tests/safeds/data/tabular/containers/_string_cell/test_to_int.py new file mode 100644 index 000000000..b4b3256cc --- /dev/null +++ b/tests/safeds/data/tabular/containers/_string_cell/test_to_int.py @@ -0,0 +1,26 @@ +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("string", "base", "expected"), + [ + ("", 10, None), + ("11", 10, 11), + ("11", 2, 3), + ("abc", 10, None), + ], + ids=[ + "empty", + "11 base 10", + "11 base 2", + "invalid string", + ], +) +def test_should_parse_integer(string: str, base: int, expected: bool) -> None: + assert_cell_operation_works( + string, + lambda cell: cell.str.to_int(base=base), + expected, + ) diff --git a/tests/safeds/data/tabular/containers/_string_cell/test_to_lowercase.py b/tests/safeds/data/tabular/containers/_string_cell/test_to_lowercase.py new file mode 100644 index 000000000..f4c880761 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_string_cell/test_to_lowercase.py @@ -0,0 +1,18 @@ +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("string", "expected"), + [ + ("", ""), + ("AbC", "abc"), + ], + ids=[ + "empty", + "non-empty", + ], +) +def test_should_lowercase_a_string(string: str, expected: str) -> None: + assert_cell_operation_works(string, lambda cell: cell.str.to_lowercase(), expected) diff --git a/tests/safeds/data/tabular/containers/_string_cell/test_to_uppercase.py b/tests/safeds/data/tabular/containers/_string_cell/test_to_uppercase.py new file mode 100644 index 000000000..cfb14c7d2 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_string_cell/test_to_uppercase.py @@ -0,0 +1,18 @@ +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("string", "expected"), + [ + ("", ""), + ("AbC", "ABC"), + ], + ids=[ + "empty", + "non-empty", + ], +) +def test_should_uppercase_a_string(string: str, expected: str) -> None: + assert_cell_operation_works(string, lambda cell: cell.str.to_uppercase(), expected) diff --git a/tests/safeds/data/tabular/containers/_string_cell/test_trim.py b/tests/safeds/data/tabular/containers/_string_cell/test_trim.py new file mode 100644 index 000000000..2b2101e4e --- /dev/null +++ b/tests/safeds/data/tabular/containers/_string_cell/test_trim.py @@ -0,0 +1,24 @@ +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("string", "expected"), + [ + ("", ""), + ("abc", "abc"), + (" abc", "abc"), + ("abc ", "abc"), + (" abc ", "abc"), + ], + ids=[ + "empty", + "non-empty", + "whitespace start", + "whitespace end", + "whitespace start and end", + ], +) +def test_should_remove_whitespace_prefix_and_suffix(string: str, expected: str) -> None: + assert_cell_operation_works(string, lambda cell: cell.str.trim(), expected) diff --git a/tests/safeds/data/tabular/containers/_string_cell/test_trim_end.py b/tests/safeds/data/tabular/containers/_string_cell/test_trim_end.py new file mode 100644 index 000000000..af0cd88dc --- /dev/null +++ b/tests/safeds/data/tabular/containers/_string_cell/test_trim_end.py @@ -0,0 +1,24 @@ +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("string", "expected"), + [ + ("", ""), + ("abc", "abc"), + (" abc", " abc"), + ("abc ", "abc"), + (" abc ", " abc"), + ], + ids=[ + "empty", + "non-empty", + "whitespace start", + "whitespace end", + "whitespace start and end", + ], +) +def test_should_remove_whitespace_suffix(string: str, expected: str) -> None: + assert_cell_operation_works(string, lambda cell: cell.str.trim_end(), expected) diff --git a/tests/safeds/data/tabular/containers/_string_cell/test_trim_start.py b/tests/safeds/data/tabular/containers/_string_cell/test_trim_start.py new file mode 100644 index 000000000..6b487f6e7 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_string_cell/test_trim_start.py @@ -0,0 +1,24 @@ +import pytest + +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("string", "expected"), + [ + ("", ""), + ("abc", "abc"), + (" abc", "abc"), + ("abc ", "abc "), + (" abc ", "abc "), + ], + ids=[ + "empty", + "non-empty", + "whitespace start", + "whitespace end", + "whitespace start and end", + ], +) +def test_should_remove_whitespace_prefix(string: str, expected: str) -> None: + assert_cell_operation_works(string, lambda cell: cell.str.trim_start(), expected)