diff --git a/src/safeds/_typing/__init__.py b/src/safeds/_typing/__init__.py new file mode 100644 index 000000000..b418fe5f9 --- /dev/null +++ b/src/safeds/_typing/__init__.py @@ -0,0 +1,27 @@ +from __future__ import annotations + +import datetime +from decimal import Decimal +from typing import TypeAlias + +from safeds.data.tabular.containers import Cell + +_NumericLiteral: TypeAlias = int | float | Decimal +_TemporalLiteral: TypeAlias = datetime.date | datetime.time | datetime.datetime | datetime.timedelta +_PythonLiteral: TypeAlias = _NumericLiteral | bool | str | bytes | _TemporalLiteral +_ConvertibleToCell: TypeAlias = _PythonLiteral | Cell | None +_BooleanCell: TypeAlias = Cell[bool | None] +# We cannot restrict `Cell`, because `Row.get_cell` returns a `Cell[Any]`. +_ConvertibleToBooleanCell: TypeAlias = bool | Cell | None +_ConvertibleToIntCell: TypeAlias = int | Cell | None + + +__all__ = [ + "_BooleanCell", + "_ConvertibleToBooleanCell", + "_ConvertibleToCell", + "_ConvertibleToIntCell", + "_NumericLiteral", + "_PythonLiteral", + "_TemporalLiteral", +] diff --git a/src/safeds/_utils/__init__.py b/src/safeds/_utils/__init__.py index 83e31d841..fc3d501d4 100644 --- a/src/safeds/_utils/__init__.py +++ b/src/safeds/_utils/__init__.py @@ -7,6 +7,7 @@ if TYPE_CHECKING: from ._collections import _compute_duplicates from ._hashing import _structural_hash + from ._lazy import _safe_collect_lazy_frame, _safe_collect_lazy_frame_schema from ._plotting import _figure_to_image from ._random import _get_random_seed @@ -15,6 +16,8 @@ { "_compute_duplicates": "._collections:_compute_duplicates", "_structural_hash": "._hashing:_structural_hash", + "_safe_collect_lazy_frame": "._lazy:_safe_collect_lazy_frame", + "_safe_collect_lazy_frame_schema": "._lazy:_safe_collect_lazy_frame_schema", "_figure_to_image": "._plotting:_figure_to_image", "_get_random_seed": "._random:_get_random_seed", }, @@ -24,5 +27,7 @@ "_compute_duplicates", "_figure_to_image", "_get_random_seed", + "_safe_collect_lazy_frame", + "_safe_collect_lazy_frame_schema", "_structural_hash", ] diff --git a/src/safeds/_utils/_lazy.py b/src/safeds/_utils/_lazy.py new file mode 100644 index 000000000..92e443508 --- /dev/null +++ b/src/safeds/_utils/_lazy.py @@ -0,0 +1,62 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from safeds.exceptions import LazyComputationError + +if TYPE_CHECKING: + import polars as pl + + +def _safe_collect_lazy_frame(frame: pl.LazyFrame) -> pl.DataFrame: + """ + Collect a LazyFrame into a DataFrame and raise a custom error if an error occurs. + + Parameters + ---------- + frame: + The LazyFrame to collect. + + Returns + ------- + frame: + The collected DataFrame. + + Raises + ------ + LazyComputationError + If an error occurs during the computation. + """ + from polars.exceptions import PolarsError + + try: + return frame.collect() + except PolarsError as e: + raise LazyComputationError(str(e)) from None + + +def _safe_collect_lazy_frame_schema(frame: pl.LazyFrame) -> pl.Schema: + """ + Collect the schema of a LazyFrame. + + Parameters + ---------- + frame: + The LazyFrame to collect the schema of. + + Returns + ------- + schema: + The collected schema. + + Raises + ------ + LazyComputationError + If an error occurs during the computation. + """ + from polars.exceptions import PolarsError + + try: + return frame.collect_schema() + except PolarsError as e: + raise LazyComputationError(str(e)) from None diff --git a/src/safeds/data/tabular/containers/_cell.py b/src/safeds/data/tabular/containers/_cell.py index 877159940..0cc022b45 100644 --- a/src/safeds/data/tabular/containers/_cell.py +++ b/src/safeds/data/tabular/containers/_cell.py @@ -1,24 +1,27 @@ from __future__ import annotations -import datetime from abc import ABC, abstractmethod -from decimal import Decimal -from typing import TYPE_CHECKING, Any, Generic, TypeAlias, TypeVar +from typing import TYPE_CHECKING, Generic, TypeVar if TYPE_CHECKING: + import datetime as python_datetime + import polars as pl + from safeds._typing import ( + _BooleanCell, + _ConvertibleToBooleanCell, + _ConvertibleToCell, + _ConvertibleToIntCell, + _PythonLiteral, + ) + from safeds.data.tabular.typing import ColumnType + from ._string_cell import StringCell from ._temporal_cell import TemporalCell T_co = TypeVar("T_co", covariant=True) -P_contra = TypeVar("P_contra", contravariant=True) -R_co = TypeVar("R_co", covariant=True) - - -_NumericLiteral: TypeAlias = int | float | Decimal -_TemporalLiteral: TypeAlias = datetime.date | datetime.time | datetime.datetime | datetime.timedelta -_PythonLiteral: TypeAlias = _NumericLiteral | bool | str | bytes | _TemporalLiteral | None +P = TypeVar("P") class Cell(ABC, Generic[T_co]): @@ -33,9 +36,9 @@ class Cell(ABC, Generic[T_co]): # ------------------------------------------------------------------------------------------------------------------ @staticmethod - def from_literal(value: _PythonLiteral) -> Cell: + def constant(value: _PythonLiteral | None) -> Cell: """ - Create a new cell from a literal value. + Create a cell with a constant value. Parameters ---------- @@ -46,6 +49,21 @@ def from_literal(value: _PythonLiteral) -> Cell: ------- cell: The created cell. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [1, 2, None]) + >>> column.transform(lambda _: Cell.constant(3)) + +-----+ + | a | + | --- | + | i32 | + +=====+ + | 3 | + | 3 | + | 3 | + +-----+ """ import polars as pl @@ -54,26 +72,326 @@ def from_literal(value: _PythonLiteral) -> Cell: return _LazyCell(pl.lit(value)) @staticmethod - def first_not_none(cells: list[Cell]) -> Cell: + def date( + year: _ConvertibleToIntCell, + month: _ConvertibleToIntCell, + day: _ConvertibleToIntCell, + ) -> Cell[python_datetime.date | None]: """ - Return the first cell from the given list that is not None. + Create a cell with a date. + + Invalid dates are converted to missing values (`None`). + + Parameters + ---------- + year: + The year. + month: + The month. Must be between 1 and 12. + day: + The day. Must be between 1 and 31. + + Returns + ------- + cell: + The created cell. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [1, 2, None]) + >>> column.transform(lambda _: Cell.date(2025, 1, 15)) + +------------+ + | a | + | --- | + | date | + +============+ + | 2025-01-15 | + | 2025-01-15 | + | 2025-01-15 | + +------------+ + + >>> column.transform(lambda cell: Cell.date(2025, cell, 15)) + +------------+ + | a | + | --- | + | date | + +============+ + | 2025-01-15 | + | 2025-02-15 | + | null | + +------------+ + """ + import polars as pl + + from ._lazy_cell import _LazyCell # circular import + + return _LazyCell( + pl.date( + year=_unwrap(year), + month=_unwrap(month), + day=_unwrap(day), + ), + ) + + @staticmethod + def datetime( + year: _ConvertibleToIntCell, + month: _ConvertibleToIntCell, + day: _ConvertibleToIntCell, + *, + hour: _ConvertibleToIntCell = 0, + minute: _ConvertibleToIntCell = 0, + second: _ConvertibleToIntCell = 0, + microsecond: _ConvertibleToIntCell = 0, + ) -> Cell[python_datetime.datetime | None]: + """ + Create a cell with a datetime. + + Invalid datetimes are converted to missing values (`None`). + + Parameters + ---------- + year: + The year. + month: + The month. Must be between 1 and 12. + day: + The day. Must be between 1 and 31. + hour: + The hour. Must be between 0 and 23. + minute: + The minute. Must be between 0 and 59. + second: + The second. Must be between 0 and 59. + microsecond: + The microsecond. Must be between 0 and 999,999. + + Returns + ------- + cell: + The created cell. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [1, 2, None]) + >>> column.transform(lambda _: Cell.datetime(2025, 1, 15, hour=12)) + +---------------------+ + | a | + | --- | + | datetime[μs] | + +=====================+ + | 2025-01-15 12:00:00 | + | 2025-01-15 12:00:00 | + | 2025-01-15 12:00:00 | + +---------------------+ + + >>> column.transform(lambda cell: Cell.datetime(2025, 1, 15, hour=cell)) + +---------------------+ + | a | + | --- | + | datetime[μs] | + +=====================+ + | 2025-01-15 01:00:00 | + | 2025-01-15 02:00:00 | + | null | + +---------------------+ + """ + import polars as pl + + from ._lazy_cell import _LazyCell # circular import + + pl_year = _unwrap(year) + pl_month = _unwrap(month) + pl_day = _unwrap(day) + pl_hour = _unwrap(hour) + pl_minute = _unwrap(minute) + pl_second = _unwrap(second) + pl_microsecond = _unwrap(microsecond) + + # By default, microseconds overflow into seconds + return _LazyCell( + pl.when(pl_microsecond <= 999_999) + .then(pl.datetime(pl_year, pl_month, pl_day, pl_hour, pl_minute, pl_second, pl_microsecond)) + .otherwise(None), + ) + + @staticmethod + def duration( + *, + weeks: _ConvertibleToIntCell = 0, + days: _ConvertibleToIntCell = 0, + hours: _ConvertibleToIntCell = 0, + minutes: _ConvertibleToIntCell = 0, + seconds: _ConvertibleToIntCell = 0, + milliseconds: _ConvertibleToIntCell = 0, + microseconds: _ConvertibleToIntCell = 0, + ) -> Cell[python_datetime.timedelta | None]: + """ + Create a cell with a duration. + + Invalid durations are converted to missing values (`None`). + + Parameters + ---------- + weeks: + The number of weeks. + days: + The number of days. + hours: + The number of hours. + minutes: + The number of minutes. + seconds: + The number of seconds. + milliseconds: + The number of milliseconds. + microseconds: + The number of microseconds. + + Returns + ------- + cell: + The created cell. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [1, 2, None]) + >>> column.transform(lambda _: Cell.duration(hours=1)) + +--------------+ + | a | + | --- | + | duration[μs] | + +==============+ + | 1h | + | 1h | + | 1h | + +--------------+ + + >>> column.transform(lambda cell: Cell.duration(hours = cell)) + +--------------+ + | a | + | --- | + | duration[μs] | + +==============+ + | 1h | + | 2h | + | null | + +--------------+ + """ + import polars as pl + + from ._lazy_cell import _LazyCell # circular import + + return _LazyCell( + pl.duration( + weeks=_unwrap(weeks), + days=_unwrap(days), + hours=_unwrap(hours), + minutes=_unwrap(minutes), + seconds=_unwrap(seconds), + milliseconds=_unwrap(milliseconds), + microseconds=_unwrap(microseconds), + ), + ) + + @staticmethod + def time( + hour: _ConvertibleToIntCell, + minute: _ConvertibleToIntCell, + second: _ConvertibleToIntCell, + *, + microsecond: _ConvertibleToIntCell = 0, + ) -> Cell[python_datetime.time | None]: + """ + Create a cell with a time. + + Invalid times are converted to missing values (`None`). + + Parameters + ---------- + hour: + The hour. Must be between 0 and 23. + minute: + The minute. Must be between 0 and 59. + second: + The second. Must be between 0 and 59. + microsecond: + The microsecond. Must be between 0 and 999,999. + + Returns + ------- + cell: + The created cell. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [1, 2, None]) + >>> column.transform(lambda _: Cell.time(12, 0, 0)) + +----------+ + | a | + | --- | + | time | + +==========+ + | 12:00:00 | + | 12:00:00 | + | 12:00:00 | + +----------+ + + >>> column.transform(lambda cell: Cell.time(12, cell, 0, microsecond=1)) + +-----------------+ + | a | + | --- | + | time | + +=================+ + | 12:01:00.000001 | + | 12:02:00.000001 | + | null | + +-----------------+ + """ + import polars as pl + + from ._lazy_cell import _LazyCell # circular import + + pl_hour = _unwrap(hour) + pl_minute = _unwrap(minute) + pl_second = _unwrap(second) + pl_microsecond = _unwrap(microsecond) + + # By default, microseconds overflow into seconds + return _LazyCell( + pl.when(pl_microsecond <= 999_999) + .then(pl.time(pl_hour, pl_minute, pl_second, pl_microsecond)) + .otherwise(None), + ) + + @staticmethod + def first_not_none(cells: list[Cell[P]]) -> Cell[P | None]: + """ + Return the first cell that is not None or None if all cells are None. Parameters ---------- cells: - The list of cells to be searched. + The list of cells to be checked. Returns ------- cell: - Returns the contents of the first cell that is not None. If all cells in the list are None or the list is - empty returns None. + The first cell that is not None or None if all cells are None. """ import polars as pl from ._lazy_cell import _LazyCell # circular import - return _LazyCell(pl.coalesce([cell._polars_expression for cell in cells])) + # `coalesce` raises in this case + if not cells: + return Cell.constant(None) + + return _LazyCell(pl.coalesce([_unwrap(cell) for cell in cells])) # ------------------------------------------------------------------------------------------------------------------ # Dunder methods @@ -82,115 +400,121 @@ def first_not_none(cells: list[Cell]) -> Cell: # "Boolean" operators (actually bitwise) ----------------------------------- @abstractmethod - def __invert__(self) -> Cell[bool]: ... + def __invert__(self) -> _BooleanCell: ... @abstractmethod - def __and__(self, other: bool | Cell[bool]) -> Cell[bool]: ... + def __and__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: ... @abstractmethod - def __rand__(self, other: bool | Cell[bool]) -> Cell[bool]: ... + def __rand__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: ... @abstractmethod - def __or__(self, other: bool | Cell[bool]) -> Cell[bool]: ... + def __or__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: ... @abstractmethod - def __ror__(self, other: bool | Cell[bool]) -> Cell[bool]: ... + def __ror__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: ... @abstractmethod - def __xor__(self, other: bool | Cell[bool]) -> Cell[bool]: ... + def __xor__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: ... @abstractmethod - def __rxor__(self, other: bool | Cell[bool]) -> Cell[bool]: ... + def __rxor__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: ... # Comparison --------------------------------------------------------------- @abstractmethod - def __eq__(self, other: object) -> Cell[bool]: # type: ignore[override] + def __eq__(self, other: _ConvertibleToCell) -> _BooleanCell: # type: ignore[override] ... @abstractmethod - def __ge__(self, other: Any) -> Cell[bool]: ... + def __ge__(self, other: _ConvertibleToCell) -> _BooleanCell: ... @abstractmethod - def __gt__(self, other: Any) -> Cell[bool]: ... + def __gt__(self, other: _ConvertibleToCell) -> _BooleanCell: ... @abstractmethod - def __le__(self, other: Any) -> Cell[bool]: ... + def __le__(self, other: _ConvertibleToCell) -> _BooleanCell: ... @abstractmethod - def __lt__(self, other: Any) -> Cell[bool]: ... + def __lt__(self, other: _ConvertibleToCell) -> _BooleanCell: ... @abstractmethod - def __ne__(self, other: object) -> Cell[bool]: # type: ignore[override] + def __ne__(self, other: _ConvertibleToCell) -> _BooleanCell: # type: ignore[override] ... # Numeric operators -------------------------------------------------------- @abstractmethod - def __abs__(self) -> Cell[R_co]: ... + def __abs__(self) -> Cell: ... @abstractmethod - def __ceil__(self) -> Cell[R_co]: ... + def __ceil__(self) -> Cell: ... @abstractmethod - def __floor__(self) -> Cell[R_co]: ... + def __floor__(self) -> Cell: ... @abstractmethod - def __neg__(self) -> Cell[R_co]: ... + def __neg__(self) -> Cell: ... @abstractmethod - def __pos__(self) -> Cell[R_co]: ... + def __pos__(self) -> Cell: ... @abstractmethod - def __add__(self, other: Any) -> Cell[R_co]: ... + def __add__(self, other: _ConvertibleToCell) -> Cell: ... @abstractmethod - def __radd__(self, other: Any) -> Cell[R_co]: ... + def __radd__(self, other: _ConvertibleToCell) -> Cell: ... @abstractmethod - def __floordiv__(self, other: Any) -> Cell[R_co]: ... + def __floordiv__(self, other: _ConvertibleToCell) -> Cell: ... @abstractmethod - def __rfloordiv__(self, other: Any) -> Cell[R_co]: ... + def __rfloordiv__(self, other: _ConvertibleToCell) -> Cell: ... @abstractmethod - def __mod__(self, other: Any) -> Cell[R_co]: ... + def __mod__(self, other: _ConvertibleToCell) -> Cell: ... @abstractmethod - def __rmod__(self, other: Any) -> Cell[R_co]: ... + def __rmod__(self, other: _ConvertibleToCell) -> Cell: ... @abstractmethod - def __mul__(self, other: Any) -> Cell[R_co]: ... + def __mul__(self, other: _ConvertibleToCell) -> Cell: ... @abstractmethod - def __rmul__(self, other: Any) -> Cell[R_co]: ... + def __rmul__(self, other: _ConvertibleToCell) -> Cell: ... @abstractmethod - def __pow__(self, other: float | Cell[P_contra]) -> Cell[R_co]: ... + def __pow__(self, other: _ConvertibleToCell) -> Cell: ... @abstractmethod - def __rpow__(self, other: float | Cell[P_contra]) -> Cell[R_co]: ... + def __rpow__(self, other: _ConvertibleToCell) -> Cell: ... @abstractmethod - def __sub__(self, other: Any) -> Cell[R_co]: ... + def __sub__(self, other: _ConvertibleToCell) -> Cell: ... @abstractmethod - def __rsub__(self, other: Any) -> Cell[R_co]: ... + def __rsub__(self, other: _ConvertibleToCell) -> Cell: ... @abstractmethod - def __truediv__(self, other: Any) -> Cell[R_co]: ... + def __truediv__(self, other: _ConvertibleToCell) -> Cell: ... @abstractmethod - def __rtruediv__(self, other: Any) -> Cell[R_co]: ... + def __rtruediv__(self, other: _ConvertibleToCell) -> Cell: ... # Other -------------------------------------------------------------------- @abstractmethod def __hash__(self) -> int: ... + @abstractmethod + def __repr__(self) -> str: ... + @abstractmethod def __sizeof__(self) -> int: ... + @abstractmethod + def __str__(self) -> str: ... + # ------------------------------------------------------------------------------------------------------------------ # Properties # ------------------------------------------------------------------------------------------------------------------ @@ -198,134 +522,183 @@ def __sizeof__(self) -> int: ... @property @abstractmethod def str(self) -> StringCell: - """Namespace for operations on strings.""" + """ + Namespace for operations on strings. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", ["hi", "hello"]) + >>> column.transform(lambda cell: cell.str.length()) + +-----+ + | a | + | --- | + | u32 | + +=====+ + | 2 | + | 5 | + +-----+ + """ @property @abstractmethod def dt(self) -> TemporalCell: - """Namespace for operations on date time values.""" + """ + Namespace for operations on temporal values. + + Examples + -------- + >>> import datetime + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [datetime.datetime(2025, 1, 1), datetime.datetime(2024, 1, 1)]) + >>> column.transform(lambda cell: cell.dt.year()) + +------+ + | a | + | --- | + | i32 | + +======+ + | 2025 | + | 2024 | + +------+ + """ # ------------------------------------------------------------------------------------------------------------------ # Boolean operations # ------------------------------------------------------------------------------------------------------------------ - def not_(self) -> Cell[bool]: + def not_(self) -> _BooleanCell: """ Negate a boolean. This is equivalent to the `~` operator. + Do **not** use the `not` operator. Its behavior cannot be overwritten in Python, so it will not work as + expected. + Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", [True, False]) + >>> column = Column("a", [True, False, None]) >>> column.transform(lambda cell: cell.not_()) - +---------+ - | example | - | --- | - | bool | - +=========+ - | false | - | true | - +---------+ + +-------+ + | a | + | --- | + | bool | + +=======+ + | false | + | true | + | null | + +-------+ >>> column.transform(lambda cell: ~cell) - +---------+ - | example | - | --- | - | bool | - +=========+ - | false | - | true | - +---------+ + +-------+ + | a | + | --- | + | bool | + +=======+ + | false | + | true | + | null | + +-------+ """ return self.__invert__() - def and_(self, other: bool | Cell[bool]) -> Cell[bool]: + def and_(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: """ Perform a boolean AND operation. This is equivalent to the `&` operator. + Do **not** use the `and` operator. Its behavior cannot be overwritten in Python, so it will not work as + expected. + Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", [True, False]) - >>> column.transform(lambda cell: cell.and_(False)) - +---------+ - | example | - | --- | - | bool | - +=========+ - | false | - | false | - +---------+ - - >>> column.transform(lambda cell: cell & False) - +---------+ - | example | - | --- | - | bool | - +=========+ - | false | - | false | - +---------+ + >>> column = Column("a", [True, False, None]) + >>> column.transform(lambda cell: cell.and_(True)) + +-------+ + | a | + | --- | + | bool | + +=======+ + | true | + | false | + | null | + +-------+ + + >>> column.transform(lambda cell: cell & True) + +-------+ + | a | + | --- | + | bool | + +=======+ + | true | + | false | + | null | + +-------+ """ return self.__and__(other) - def or_(self, other: bool | Cell[bool]) -> Cell[bool]: + def or_(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: """ Perform a boolean OR operation. This is equivalent to the `|` operator. + Do **not** use the `or` operator. Its behavior cannot be overwritten in Python, so it will not work as expected. + Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", [True, False]) - >>> column.transform(lambda cell: cell.or_(True)) - +---------+ - | example | - | --- | - | bool | - +=========+ - | true | - | true | - +---------+ - - >>> column.transform(lambda cell: cell | True) - +---------+ - | example | - | --- | - | bool | - +=========+ - | true | - | true | - +---------+ + >>> column = Column("a", [True, False, None]) + >>> column.transform(lambda cell: cell.or_(False)) + +-------+ + | a | + | --- | + | bool | + +=======+ + | true | + | false | + | null | + +-------+ + + >>> column.transform(lambda cell: cell | False) + +-------+ + | a | + | --- | + | bool | + +=======+ + | true | + | false | + | null | + +-------+ """ return self.__or__(other) - def xor(self, other: bool | Cell[bool]) -> Cell[bool]: + def xor(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: """ Perform a boolean XOR operation. This is equivalent to the `^` operator. Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", [True, False]) + >>> column = Column("a", [True, False, None]) >>> column.transform(lambda cell: cell.xor(True)) - +---------+ - | example | - | --- | - | bool | - +=========+ - | false | - | true | - +---------+ + +-------+ + | a | + | --- | + | bool | + +=======+ + | false | + | true | + | null | + +-------+ >>> column.transform(lambda cell: cell ^ True) - +---------+ - | example | - | --- | - | bool | - +=========+ - | false | - | true | - +---------+ + +-------+ + | a | + | --- | + | bool | + +=======+ + | false | + | true | + | null | + +-------+ """ return self.__xor__(other) @@ -333,263 +706,293 @@ def xor(self, other: bool | Cell[bool]) -> Cell[bool]: # Numeric operations # ------------------------------------------------------------------------------------------------------------------ - def abs(self) -> Cell[R_co]: + def abs(self) -> Cell: """ Get the absolute value. Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", [1, -2]) + >>> column = Column("a", [1, -2, None]) >>> column.transform(lambda cell: cell.abs()) - +---------+ - | example | - | --- | - | i64 | - +=========+ - | 1 | - | 2 | - +---------+ + +------+ + | a | + | --- | + | i64 | + +======+ + | 1 | + | 2 | + | null | + +------+ """ return self.__abs__() - def ceil(self) -> Cell[R_co]: + def ceil(self) -> Cell: """ Round up to the nearest integer. Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", [1.1, 2.9]) + >>> column = Column("a", [1.1, 3.0, None]) >>> column.transform(lambda cell: cell.ceil()) +---------+ - | example | + | a | | --- | | f64 | +=========+ | 2.00000 | | 3.00000 | + | null | +---------+ """ return self.__ceil__() - def floor(self) -> Cell[R_co]: + def floor(self) -> Cell: """ Round down to the nearest integer. Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", [1.1, 2.9]) + >>> column = Column("a", [1.1, 3.0, None]) >>> column.transform(lambda cell: cell.floor()) +---------+ - | example | + | a | | --- | | f64 | +=========+ | 1.00000 | - | 2.00000 | + | 3.00000 | + | null | +---------+ """ return self.__floor__() - def neg(self) -> Cell[R_co]: + def neg(self) -> Cell: """ - Negate the value. + Negate the value. This is equivalent to the unary `-` operator. Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", [1, -2]) + >>> column = Column("a", [1, -2, None]) >>> column.transform(lambda cell: cell.neg()) - +---------+ - | example | - | --- | - | i64 | - +=========+ - | -1 | - | 2 | - +---------+ + +------+ + | a | + | --- | + | i64 | + +======+ + | -1 | + | 2 | + | null | + +------+ + + >>> column.transform(lambda cell: -cell) + +------+ + | a | + | --- | + | i64 | + +======+ + | -1 | + | 2 | + | null | + +------+ """ return self.__neg__() - def add(self, other: Any) -> Cell[R_co]: + def add(self, other: _ConvertibleToCell) -> Cell: """ Add a value. This is equivalent to the `+` operator. Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", [1, 2]) + >>> column = Column("a", [1, 2, None]) >>> column.transform(lambda cell: cell.add(3)) - +---------+ - | example | - | --- | - | i64 | - +=========+ - | 4 | - | 5 | - +---------+ + +------+ + | a | + | --- | + | i64 | + +======+ + | 4 | + | 5 | + | null | + +------+ >>> column.transform(lambda cell: cell + 3) - +---------+ - | example | - | --- | - | i64 | - +=========+ - | 4 | - | 5 | - +---------+ + +------+ + | a | + | --- | + | i64 | + +======+ + | 4 | + | 5 | + | null | + +------+ """ return self.__add__(other) - def div(self, other: Any) -> Cell[R_co]: + def div(self, other: _ConvertibleToCell) -> Cell: """ Divide by a value. This is equivalent to the `/` operator. Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", [6, 8]) + >>> column = Column("a", [6, 8, None]) >>> column.transform(lambda cell: cell.div(2)) +---------+ - | example | + | a | | --- | | f64 | +=========+ | 3.00000 | | 4.00000 | + | null | +---------+ >>> column.transform(lambda cell: cell / 2) +---------+ - | example | + | a | | --- | | f64 | +=========+ | 3.00000 | | 4.00000 | + | null | +---------+ """ return self.__truediv__(other) - def mod(self, other: Any) -> Cell[R_co]: + def mod(self, other: _ConvertibleToCell) -> Cell: """ Perform a modulo operation. This is equivalent to the `%` operator. Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", [5, 6]) + >>> column = Column("a", [5, 6, -1, None]) >>> column.transform(lambda cell: cell.mod(3)) - +---------+ - | example | - | --- | - | i64 | - +=========+ - | 2 | - | 0 | - +---------+ + +------+ + | a | + | --- | + | i64 | + +======+ + | 2 | + | 0 | + | 2 | + | null | + +------+ >>> column.transform(lambda cell: cell % 3) - +---------+ - | example | - | --- | - | i64 | - +=========+ - | 2 | - | 0 | - +---------+ + +------+ + | a | + | --- | + | i64 | + +======+ + | 2 | + | 0 | + | 2 | + | null | + +------+ """ return self.__mod__(other) - def mul(self, other: Any) -> Cell[R_co]: + def mul(self, other: _ConvertibleToCell) -> Cell: """ Multiply by a value. This is equivalent to the `*` operator. Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", [2, 3]) + >>> column = Column("a", [2, 3, None]) >>> column.transform(lambda cell: cell.mul(4)) - +---------+ - | example | - | --- | - | i64 | - +=========+ - | 8 | - | 12 | - +---------+ + +------+ + | a | + | --- | + | i64 | + +======+ + | 8 | + | 12 | + | null | + +------+ >>> column.transform(lambda cell: cell * 4) - +---------+ - | example | - | --- | - | i64 | - +=========+ - | 8 | - | 12 | - +---------+ + +------+ + | a | + | --- | + | i64 | + +======+ + | 8 | + | 12 | + | null | + +------+ """ return self.__mul__(other) - def pow(self, other: float | Cell[P_contra]) -> Cell[R_co]: + def pow(self, other: _ConvertibleToCell) -> Cell: """ Raise to a power. This is equivalent to the `**` operator. Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", [2, 3]) + >>> column = Column("a", [2, 3, None]) >>> column.transform(lambda cell: cell.pow(3)) - +---------+ - | example | - | --- | - | i64 | - +=========+ - | 8 | - | 27 | - +---------+ + +------+ + | a | + | --- | + | i64 | + +======+ + | 8 | + | 27 | + | null | + +------+ + >>> column.transform(lambda cell: cell ** 3) - +---------+ - | example | - | --- | - | i64 | - +=========+ - | 8 | - | 27 | - +---------+ + +------+ + | a | + | --- | + | i64 | + +======+ + | 8 | + | 27 | + | null | + +------+ """ return self.__pow__(other) - def sub(self, other: Any) -> Cell[R_co]: + def sub(self, other: _ConvertibleToCell) -> Cell: """ - Subtract a value. This is equivalent to the `-` operator. + Subtract a value. This is equivalent to the binary `-` operator. Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", [5, 6]) + >>> column = Column("a", [5, 6, None]) >>> column.transform(lambda cell: cell.sub(3)) - +---------+ - | example | - | --- | - | i64 | - +=========+ - | 2 | - | 3 | - +---------+ + +------+ + | a | + | --- | + | i64 | + +======+ + | 2 | + | 3 | + | null | + +------+ >>> column.transform(lambda cell: cell - 3) - +---------+ - | example | - | --- | - | i64 | - +=========+ - | 2 | - | 3 | - +---------+ + +------+ + | a | + | --- | + | i64 | + +======+ + | 2 | + | 3 | + | null | + +------+ """ return self.__sub__(other) @@ -597,186 +1000,289 @@ def sub(self, other: Any) -> Cell[R_co]: # Comparison operations # ------------------------------------------------------------------------------------------------------------------ - def eq(self, other: Any) -> Cell[bool]: + @abstractmethod + def eq( + self, + other: _ConvertibleToCell, + *, + propagate_missing_values: bool = True, + ) -> _BooleanCell: """ - Check if equal to a value. This is equivalent to the `==` operator. + Check if equal to a value. The default behavior is equivalent to the `==` operator. + + Missing values (indicated by `None`) are handled as follows: + + - If `propagate_missing_values` is `True` (default), the result will be a missing value if either the cell or + the other value is a missing value. Here, `None == None` is `None`. The intuition is that we do not know the + result of the comparison if we do not know the values, which is consistent with the other cell operations. + - If `propagate_missing_values` is `False`, `None` will be treated as a regular value. Here, `None == None` + is `True`. This behavior is useful, if you want to work with missing values, e.g. to filter them out. Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", [1, 2]) + >>> column = Column("a", [1, 2, None]) >>> column.transform(lambda cell: cell.eq(2)) - +---------+ - | example | - | --- | - | bool | - +=========+ - | false | - | true | - +---------+ + +-------+ + | a | + | --- | + | bool | + +=======+ + | false | + | true | + | null | + +-------+ >>> column.transform(lambda cell: cell == 2) - +---------+ - | example | - | --- | - | bool | - +=========+ - | false | - | true | - +---------+ + +-------+ + | a | + | --- | + | bool | + +=======+ + | false | + | true | + | null | + +-------+ + + >>> column.transform(lambda cell: cell.eq(2, propagate_missing_values=False)) + +-------+ + | a | + | --- | + | bool | + +=======+ + | false | + | true | + | false | + +-------+ """ - return self.__eq__(other) - def neq(self, other: Any) -> Cell[bool]: + @abstractmethod + def neq( + self, + other: _ConvertibleToCell, + *, + propagate_missing_values: bool = True, + ) -> _BooleanCell: """ - Check if not equal to a value. This is equivalent to the `!=` operator. + Check if not equal to a value. The default behavior is equivalent to the `!=` operator. + + Missing values (indicated by `None`) are handled as follows: + + - If `propagate_missing_values` is `True` (default), the result will be a missing value if either the cell or + the other value is a missing value. Here, `None != None` is `None`. The intuition is that we do not know the + result of the comparison if we do not know the values, which is consistent with the other cell operations. + - If `propagate_missing_values` is `False`, `None` will be treated as a regular value. Here, `None != None` + is `False`. This behavior is useful, if you want to work with missing values, e.g. to filter them out. + + Parameters + ---------- + other: + The value to compare to. + propagate_missing_values: + Whether to propagate missing values. Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", [1, 2]) + >>> column = Column("a", [1, 2, None]) >>> column.transform(lambda cell: cell.neq(2)) - +---------+ - | example | - | --- | - | bool | - +=========+ - | true | - | false | - +---------+ + +-------+ + | a | + | --- | + | bool | + +=======+ + | true | + | false | + | null | + +-------+ >>> column.transform(lambda cell: cell != 2) - +---------+ - | example | - | --- | - | bool | - +=========+ - | true | - | false | - +---------+ + +-------+ + | a | + | --- | + | bool | + +=======+ + | true | + | false | + | null | + +-------+ + + >>> column.transform(lambda cell: cell.neq(2, propagate_missing_values=False)) + +-------+ + | a | + | --- | + | bool | + +=======+ + | true | + | false | + | true | + +-------+ """ - return self.__ne__(other) - def ge(self, other: Any) -> Cell[bool]: + def ge(self, other: _ConvertibleToCell) -> _BooleanCell: """ Check if greater than or equal to a value. This is equivalent to the `>=` operator. Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", [1, 2]) + >>> column = Column("a", [1, 2, None]) >>> column.transform(lambda cell: cell.ge(2)) - +---------+ - | example | - | --- | - | bool | - +=========+ - | false | - | true | - +---------+ + +-------+ + | a | + | --- | + | bool | + +=======+ + | false | + | true | + | null | + +-------+ >>> column.transform(lambda cell: cell >= 2) - +---------+ - | example | - | --- | - | bool | - +=========+ - | false | - | true | - +---------+ + +-------+ + | a | + | --- | + | bool | + +=======+ + | false | + | true | + | null | + +-------+ """ return self.__ge__(other) - def gt(self, other: Any) -> Cell[bool]: + def gt(self, other: _ConvertibleToCell) -> _BooleanCell: """ Check if greater than a value. This is equivalent to the `>` operator. Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", [1, 2]) + >>> column = Column("a", [1, 2, None]) >>> column.transform(lambda cell: cell.gt(2)) - +---------+ - | example | - | --- | - | bool | - +=========+ - | false | - | false | - +---------+ + +-------+ + | a | + | --- | + | bool | + +=======+ + | false | + | false | + | null | + +-------+ >>> column.transform(lambda cell: cell > 2) - +---------+ - | example | - | --- | - | bool | - +=========+ - | false | - | false | - +---------+ + +-------+ + | a | + | --- | + | bool | + +=======+ + | false | + | false | + | null | + +-------+ """ return self.__gt__(other) - def le(self, other: Any) -> Cell[bool]: + def le(self, other: _ConvertibleToCell) -> _BooleanCell: """ Check if less than or equal to a value. This is equivalent to the `<=` operator. Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", [1, 2]) + >>> column = Column("a", [1, 2, None]) >>> column.transform(lambda cell: cell.le(2)) - +---------+ - | example | - | --- | - | bool | - +=========+ - | true | - | true | - +---------+ + +------+ + | a | + | --- | + | bool | + +======+ + | true | + | true | + | null | + +------+ >>> column.transform(lambda cell: cell <= 2) - +---------+ - | example | - | --- | - | bool | - +=========+ - | true | - | true | - +---------+ + +------+ + | a | + | --- | + | bool | + +======+ + | true | + | true | + | null | + +------+ """ return self.__le__(other) - def lt(self, other: Any) -> Cell[bool]: + def lt(self, other: _ConvertibleToCell) -> _BooleanCell: """ Check if less than a value. This is equivalent to the `<` operator. Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", [1, 2]) + >>> column = Column("a", [1, 2, None]) >>> column.transform(lambda cell: cell.lt(2)) - +---------+ - | example | - | --- | - | bool | - +=========+ - | true | - | false | - +---------+ + +-------+ + | a | + | --- | + | bool | + +=======+ + | true | + | false | + | null | + +-------+ >>> column.transform(lambda cell: cell < 2) - +---------+ - | example | - | --- | - | bool | - +=========+ - | true | - | false | - +---------+ + +-------+ + | a | + | --- | + | bool | + +=======+ + | true | + | false | + | null | + +-------+ """ return self.__lt__(other) + # ------------------------------------------------------------------------------------------------------------------ + # Other + # ------------------------------------------------------------------------------------------------------------------ + + @abstractmethod + def cast(self, type_: ColumnType) -> Cell: + """ + Cast the cell to a different type. + + Parameters + ---------- + type_: + The type to cast to. + + Returns + ------- + cell: + The cast cell. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> from safeds.data.tabular.typing import ColumnType + >>> column = Column("a", [1, 2, None]) + >>> column.transform(lambda cell: cell.cast(ColumnType.string())) + +------+ + | a | + | --- | + | str | + +======+ + | 1 | + | 2 | + | null | + +------+ + """ + # ------------------------------------------------------------------------------------------------------------------ # Internal # ------------------------------------------------------------------------------------------------------------------ @@ -793,3 +1299,12 @@ def _equals(self, other: object) -> bool: This method is needed because the `__eq__` method is used for element-wise comparisons. """ + + +def _unwrap(cell_proxy: _ConvertibleToCell) -> pl.Expr: + import polars as pl + + if isinstance(cell_proxy, Cell): + return cell_proxy._polars_expression + else: + return pl.lit(cell_proxy) diff --git a/src/safeds/data/tabular/containers/_column.py b/src/safeds/data/tabular/containers/_column.py index f26e7763e..6e4f86fc1 100644 --- a/src/safeds/data/tabular/containers/_column.py +++ b/src/safeds/data/tabular/containers/_column.py @@ -18,10 +18,13 @@ if TYPE_CHECKING: from polars import Series + from safeds._typing import _BooleanCell from safeds.data.tabular.typing import ColumnType - from safeds.exceptions import ( - ColumnTypeError, # noqa: F401 - IndexOutOfBoundsError, # noqa: F401 + from safeds.exceptions import ( # noqa: F401 + ColumnTypeError, + IndexOutOfBoundsError, + LengthMismatchError, + MissingValuesError, ) from ._cell import Cell @@ -32,9 +35,6 @@ R_co = TypeVar("R_co", covariant=True) -# TODO: Rethink whether T_co should include None, also affects Cell operations ('<' return Cell[bool | None] etc.) - - class Column(Sequence[T_co]): """ A named, one-dimensional collection of homogeneous values. @@ -325,7 +325,7 @@ def get_value(self, index: int) -> T_co: @overload def all( self, - predicate: Callable[[Cell[T_co]], Cell[bool]], + predicate: Callable[[Cell[T_co]], _BooleanCell], *, ignore_unknown: Literal[True] = ..., ) -> bool: ... @@ -333,14 +333,14 @@ def all( @overload def all( self, - predicate: Callable[[Cell[T_co]], Cell[bool]], + predicate: Callable[[Cell[T_co]], _BooleanCell], *, ignore_unknown: bool, ) -> bool | None: ... def all( self, - predicate: Callable[[Cell[T_co]], Cell[bool]], + predicate: Callable[[Cell[T_co]], _BooleanCell], *, ignore_unknown: bool = True, ) -> bool | None: @@ -401,7 +401,7 @@ def all( @overload def any( self, - predicate: Callable[[Cell[T_co]], Cell[bool]], + predicate: Callable[[Cell[T_co]], _BooleanCell], *, ignore_unknown: Literal[True] = ..., ) -> bool: ... @@ -409,14 +409,14 @@ def any( @overload def any( self, - predicate: Callable[[Cell[T_co]], Cell[bool]], + predicate: Callable[[Cell[T_co]], _BooleanCell], *, ignore_unknown: bool, ) -> bool | None: ... def any( self, - predicate: Callable[[Cell[T_co]], Cell[bool]], + predicate: Callable[[Cell[T_co]], _BooleanCell], *, ignore_unknown: bool = True, ) -> bool | None: @@ -477,7 +477,7 @@ def any( @overload def count_if( self, - predicate: Callable[[Cell[T_co]], Cell[bool]], + predicate: Callable[[Cell[T_co]], _BooleanCell], *, ignore_unknown: Literal[True] = ..., ) -> int: ... @@ -485,14 +485,14 @@ def count_if( @overload def count_if( self, - predicate: Callable[[Cell[T_co]], Cell[bool]], + predicate: Callable[[Cell[T_co]], _BooleanCell], *, ignore_unknown: bool, ) -> int | None: ... def count_if( self, - predicate: Callable[[Cell[T_co]], Cell[bool]], + predicate: Callable[[Cell[T_co]], _BooleanCell], *, ignore_unknown: bool = True, ) -> int | None: @@ -547,7 +547,7 @@ def count_if( @overload def none( self, - predicate: Callable[[Cell[T_co]], Cell[bool]], + predicate: Callable[[Cell[T_co]], _BooleanCell], *, ignore_unknown: Literal[True] = ..., ) -> bool: ... @@ -555,14 +555,14 @@ def none( @overload def none( self, - predicate: Callable[[Cell[T_co]], Cell[bool]], + predicate: Callable[[Cell[T_co]], _BooleanCell], *, ignore_unknown: bool, ) -> bool | None: ... def none( self, - predicate: Callable[[Cell[T_co]], Cell[bool]], + predicate: Callable[[Cell[T_co]], _BooleanCell], *, ignore_unknown: bool = True, ) -> bool | None: diff --git a/src/safeds/data/tabular/containers/_lazy_cell.py b/src/safeds/data/tabular/containers/_lazy_cell.py index 0e1703c73..e4f850537 100644 --- a/src/safeds/data/tabular/containers/_lazy_cell.py +++ b/src/safeds/data/tabular/containers/_lazy_cell.py @@ -1,20 +1,21 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, TypeVar +from typing import TYPE_CHECKING, TypeVar from safeds._utils import _structural_hash -from ._cell import Cell +from ._cell import Cell, _unwrap if TYPE_CHECKING: import polars as pl + from safeds._typing import _BooleanCell, _ConvertibleToBooleanCell, _ConvertibleToCell + from safeds.data.tabular.typing import ColumnType + from ._string_cell import StringCell from ._temporal_cell import TemporalCell T = TypeVar("T") -P = TypeVar("P") -R = TypeVar("R") class _LazyCell(Cell[T]): @@ -33,156 +34,148 @@ def __init__(self, expression: pl.Expr) -> None: # "Boolean" operators (actually bitwise) ----------------------------------- - def __invert__(self) -> Cell[bool]: + def __invert__(self) -> _BooleanCell: import polars as pl return _wrap(self._expression.cast(pl.Boolean).__invert__()) - def __and__(self, other: bool | Cell[bool]) -> Cell[bool]: + def __and__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: other = _unwrap(other) return _wrap(self._expression.__and__(other)) - def __rand__(self, other: bool | Cell[bool]) -> Cell[bool]: + def __rand__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: other = _unwrap(other) return _wrap(self._expression.__rand__(other)) - def __or__(self, other: bool | Cell[bool]) -> Cell[bool]: + def __or__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: other = _unwrap(other) return _wrap(self._expression.__or__(other)) - def __ror__(self, other: bool | Cell[bool]) -> Cell[bool]: + def __ror__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: other = _unwrap(other) return _wrap(self._expression.__ror__(other)) - def __xor__(self, other: bool | Cell[bool]) -> Cell[bool]: + def __xor__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: other = _unwrap(other) return _wrap(self._expression.__xor__(other)) - def __rxor__(self, other: bool | Cell[bool]) -> Cell[bool]: + def __rxor__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: other = _unwrap(other) return _wrap(self._expression.__rxor__(other)) # Comparison --------------------------------------------------------------- - def __eq__(self, other: object) -> Cell[bool]: # type: ignore[override] + def __eq__(self, other: _ConvertibleToCell) -> _BooleanCell: # type: ignore[override] other = _unwrap(other) - return _wrap(self._expression.eq_missing(other)) + return _wrap(self._expression.__eq__(other)) - def __ge__(self, other: Any) -> Cell[bool]: + def __ge__(self, other: _ConvertibleToCell) -> _BooleanCell: other = _unwrap(other) return _wrap(self._expression.__ge__(other)) - def __gt__(self, other: Any) -> Cell[bool]: + def __gt__(self, other: _ConvertibleToCell) -> _BooleanCell: other = _unwrap(other) return _wrap(self._expression.__gt__(other)) - def __le__(self, other: Any) -> Cell[bool]: + def __le__(self, other: _ConvertibleToCell) -> _BooleanCell: other = _unwrap(other) return _wrap(self._expression.__le__(other)) - def __lt__(self, other: Any) -> Cell[bool]: + def __lt__(self, other: _ConvertibleToCell) -> _BooleanCell: other = _unwrap(other) return _wrap(self._expression.__lt__(other)) - def __ne__(self, other: object) -> Cell[bool]: # type: ignore[override] + def __ne__(self, other: _ConvertibleToCell) -> _BooleanCell: # type: ignore[override] other = _unwrap(other) - return _wrap(self._expression.ne_missing(other)) + return _wrap(self._expression.__ne__(other)) # Numeric operators -------------------------------------------------------- - def __abs__(self) -> Cell[R]: + def __abs__(self) -> Cell: return _wrap(self._expression.__abs__()) - def __ceil__(self) -> Cell[R]: - import polars as pl - - # polars does not yet implement floor for integers - return _wrap(self._expression.cast(pl.Float64).ceil()) - - def __floor__(self) -> Cell[R]: - import polars as pl + def __ceil__(self) -> Cell: + return _wrap(self._expression.ceil()) - # polars does not yet implement floor for integers - return _wrap(self._expression.cast(pl.Float64).floor()) + def __floor__(self) -> Cell: + return _wrap(self._expression.floor()) - def __neg__(self) -> Cell[R]: + def __neg__(self) -> Cell: return _wrap(self._expression.__neg__()) - def __pos__(self) -> Cell[R]: + def __pos__(self) -> Cell: return _wrap(self._expression.__pos__()) - def __add__(self, other: Any) -> Cell[R]: + def __add__(self, other: _ConvertibleToCell) -> Cell: other = _unwrap(other) return _wrap(self._expression.__add__(other)) - def __radd__(self, other: Any) -> Cell[R]: + def __radd__(self, other: _ConvertibleToCell) -> Cell: other = _unwrap(other) return _wrap(self._expression.__radd__(other)) - def __floordiv__(self, other: Any) -> Cell[R]: + def __floordiv__(self, other: _ConvertibleToCell) -> Cell: other = _unwrap(other) return _wrap(self._expression.__floordiv__(other)) - def __rfloordiv__(self, other: Any) -> Cell[R]: + def __rfloordiv__(self, other: _ConvertibleToCell) -> Cell: other = _unwrap(other) return _wrap(self._expression.__rfloordiv__(other)) - def __mod__(self, other: Any) -> Cell[R]: + def __mod__(self, other: _ConvertibleToCell) -> Cell: other = _unwrap(other) return _wrap(self._expression.__mod__(other)) - def __rmod__(self, other: Any) -> Cell[R]: + def __rmod__(self, other: _ConvertibleToCell) -> Cell: other = _unwrap(other) return _wrap(self._expression.__rmod__(other)) - def __mul__(self, other: Any) -> Cell[R]: + def __mul__(self, other: _ConvertibleToCell) -> Cell: other = _unwrap(other) return _wrap(self._expression.__mul__(other)) - def __rmul__(self, other: Any) -> Cell[R]: + def __rmul__(self, other: _ConvertibleToCell) -> Cell: other = _unwrap(other) return _wrap(self._expression.__rmul__(other)) - def __pow__(self, other: float | Cell[P]) -> Cell[R]: + def __pow__(self, other: _ConvertibleToCell) -> Cell: other = _unwrap(other) return _wrap(self._expression.__pow__(other)) - def __rpow__(self, other: float | Cell[P]) -> Cell[R]: + def __rpow__(self, other: _ConvertibleToCell) -> Cell: other = _unwrap(other) return _wrap(self._expression.__rpow__(other)) - def __sub__(self, other: Any) -> Cell[R]: + def __sub__(self, other: _ConvertibleToCell) -> Cell: other = _unwrap(other) return _wrap(self._expression.__sub__(other)) - def __rsub__(self, other: Any) -> Cell[R]: + def __rsub__(self, other: _ConvertibleToCell) -> Cell: other = _unwrap(other) return _wrap(self._expression.__rsub__(other)) - def __truediv__(self, other: Any) -> Cell[R]: + def __truediv__(self, other: _ConvertibleToCell) -> Cell: other = _unwrap(other) return _wrap(self._expression.__truediv__(other)) - def __rtruediv__(self, other: Any) -> Cell[R]: + def __rtruediv__(self, other: _ConvertibleToCell) -> Cell: other = _unwrap(other) return _wrap(self._expression.__rtruediv__(other)) - # String representation ---------------------------------------------------- - - def __repr__(self) -> str: - return self._expression.__repr__() - - def __str__(self) -> str: - return self._expression.__str__() - # Other -------------------------------------------------------------------- def __hash__(self) -> int: return _structural_hash(self._expression.meta.serialize()) + def __repr__(self) -> str: + return self._expression.__repr__() + def __sizeof__(self) -> int: return self._expression.__sizeof__() + def __str__(self) -> str: + return self._expression.__str__() + # ------------------------------------------------------------------------------------------------------------------ # Properties # ------------------------------------------------------------------------------------------------------------------ @@ -199,6 +192,32 @@ def dt(self) -> TemporalCell: return _LazyTemporalCell(self._expression) + # ------------------------------------------------------------------------------------------------------------------ + # Comparison operations + # ------------------------------------------------------------------------------------------------------------------ + def eq(self, other: _ConvertibleToCell, *, propagate_missing_values: bool = True) -> _BooleanCell: + other = _unwrap(other) + + if propagate_missing_values: + return _wrap(self._expression.eq(other)) + else: + return _wrap(self._expression.eq_missing(other)) + + def neq(self, other: _ConvertibleToCell, *, propagate_missing_values: bool = True) -> _BooleanCell: + other = _unwrap(other) + + if propagate_missing_values: + return _wrap(self._expression.ne(other)) + else: + return _wrap(self._expression.ne_missing(other)) + + # ------------------------------------------------------------------------------------------------------------------ + # Other + # ------------------------------------------------------------------------------------------------------------------ + + def cast(self, type_: ColumnType) -> Cell: + return _wrap(self._expression.cast(type_._polars_data_type)) + # ------------------------------------------------------------------------------------------------------------------ # Internal # ------------------------------------------------------------------------------------------------------------------ @@ -212,14 +231,8 @@ def _equals(self, other: object) -> bool: return NotImplemented if self is other: return True - return self._expression.meta.eq(other._expression.meta) - - -def _wrap(other: pl.Expr) -> Any: - return _LazyCell(other) + return self._expression.meta.eq(other._expression) -def _unwrap(other: Any) -> Any: - if isinstance(other, _LazyCell): - return other._expression - return other +def _wrap(expression: pl.Expr) -> Cell: + return _LazyCell(expression) diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 0e8df2ef7..e3c23f0f9 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -4,7 +4,12 @@ from safeds._config import _get_device, _init_default_device from safeds._config._polars import _get_polars_config -from safeds._utils import _compute_duplicates, _structural_hash +from safeds._utils import ( + _compute_duplicates, + _safe_collect_lazy_frame, + _safe_collect_lazy_frame_schema, + _structural_hash, +) from safeds._validation import ( _check_bounds, _check_columns_dont_exist, @@ -35,6 +40,7 @@ from torch import Tensor from torch.utils.data import DataLoader, Dataset + from safeds._typing import _BooleanCell from safeds.data.labeled.containers import TabularDataset from safeds.data.tabular.transformation import ( InvertibleTableTransformer, @@ -388,7 +394,7 @@ def __str__(self) -> str: @property def _data_frame(self) -> pl.DataFrame: if self.__data_frame_cache is None: - self.__data_frame_cache = self._lazy_frame.collect() + self.__data_frame_cache = _safe_collect_lazy_frame(self._lazy_frame) return self.__data_frame_cache @@ -470,7 +476,9 @@ def schema(self) -> Schema: 'b': int64 }) """ - return Schema._from_polars_schema(self._lazy_frame.collect_schema()) + return Schema._from_polars_schema( + _safe_collect_lazy_frame_schema(self._lazy_frame), + ) # ------------------------------------------------------------------------------------------------------------------ # Column operations @@ -716,7 +724,7 @@ def get_column(self, name: str) -> Column: """ _check_columns_exist(self, name) return Column._from_polars_series( - self._lazy_frame.select(name).collect().get_column(name), + _safe_collect_lazy_frame(self._lazy_frame.select(name)).get_column(name), ) def get_column_type(self, name: str) -> ColumnType: @@ -1272,7 +1280,7 @@ def transform_columns( @overload def count_rows_if( self, - predicate: Callable[[Row], Cell[bool]], + predicate: Callable[[Row], _BooleanCell], *, ignore_unknown: Literal[True] = ..., ) -> int: ... @@ -1280,14 +1288,14 @@ def count_rows_if( @overload def count_rows_if( self, - predicate: Callable[[Row], Cell[bool]], + predicate: Callable[[Row], _BooleanCell], *, ignore_unknown: bool, ) -> int | None: ... def count_rows_if( self, - predicate: Callable[[Row], Cell[bool]], + predicate: Callable[[Row], _BooleanCell], *, ignore_unknown: bool = True, ) -> int | None: @@ -1329,7 +1337,7 @@ def count_rows_if( None """ expression = predicate(_LazyVectorizedRow(self))._polars_expression - series = self._lazy_frame.select(expression.alias("count")).collect().get_column("count") + series = _safe_collect_lazy_frame(self._lazy_frame.select(expression.alias("count"))).get_column("count") if ignore_unknown or series.null_count() == 0: return series.sum() @@ -1338,7 +1346,7 @@ def count_rows_if( def filter_rows( self, - predicate: Callable[[Row], Cell[bool]], + predicate: Callable[[Row], _BooleanCell], ) -> Table: """ Keep only rows that satisfy a condition and return the result as a new table. @@ -1385,7 +1393,7 @@ def filter_rows( def filter_rows_by_column( self, name: str, - predicate: Callable[[Cell], Cell[bool]], + predicate: Callable[[Cell], _BooleanCell], ) -> Table: """ Keep only rows that satisfy a condition on a specific column and return the result as a new table. @@ -1480,7 +1488,7 @@ def remove_duplicate_rows(self) -> Table: def remove_rows( self, - predicate: Callable[[Row], Cell[bool]], + predicate: Callable[[Row], _BooleanCell], ) -> Table: """ Remove rows that satisfy a condition and return the result as a new table. @@ -1532,7 +1540,7 @@ def remove_rows( def remove_rows_by_column( self, name: str, - predicate: Callable[[Cell], Cell[bool]], + predicate: Callable[[Cell], _BooleanCell], ) -> Table: """ Remove rows that satisfy a condition on a specific column and return the result as a new table. @@ -1747,14 +1755,17 @@ def remove_rows_with_outliers( # polar's `all_horizontal` raises a `ComputeError` if there are no columns selected = self._lazy_frame.select(cs.numeric() & cs.by_name(selector)) - if not selected.collect_schema().names(): + selected_names = _safe_collect_lazy_frame_schema(selected).names() + if not selected_names: return self # Multiply z-score by standard deviation instead of dividing the distance by it, to avoid division by zero non_outlier_mask = pl.all_horizontal( - selected.select( - pl.all().is_null() | ((pl.all() - pl.all().mean()).abs() <= (z_score_threshold * pl.all().std())), - ).collect(), + _safe_collect_lazy_frame( + selected.select( + pl.all().is_null() | ((pl.all() - pl.all().mean()).abs() <= (z_score_threshold * pl.all().std())), + ), + ), ) return Table._from_polars_lazy_frame( @@ -2360,7 +2371,7 @@ def join( # Can be removed once https://github.com/pola-rs/polars/issues/20670 is fixed if mode == "right" and len(left_names) > 1: # We must collect because of https://github.com/pola-rs/polars/issues/20671 - result = result.collect().drop(left_names).lazy() + result = _safe_collect_lazy_frame(result).drop(left_names).lazy() return self._from_polars_lazy_frame( result, @@ -2495,7 +2506,7 @@ def summarize_statistics(self) -> Table: # Compute suitable types for the output columns frame = self._lazy_frame - schema = frame.collect_schema() + schema = _safe_collect_lazy_frame_schema(frame) for name, type_ in schema.items(): # polars fails to determine supertype of temporal types and u32 if not type_.is_numeric() and not type_.is_(pl.Null): diff --git a/src/safeds/data/tabular/plotting/_table_plotter.py b/src/safeds/data/tabular/plotting/_table_plotter.py index ba9f53825..3eee20bbf 100644 --- a/src/safeds/data/tabular/plotting/_table_plotter.py +++ b/src/safeds/data/tabular/plotting/_table_plotter.py @@ -3,7 +3,7 @@ import warnings from typing import TYPE_CHECKING -from safeds._utils import _figure_to_image +from safeds._utils import _figure_to_image, _safe_collect_lazy_frame from safeds._validation import _check_bounds, _check_columns_are_numeric, _check_columns_exist, _ClosedBound from safeds.exceptions import ColumnTypeError, NonNumericColumnError @@ -380,7 +380,8 @@ def line_plot( agg_list.append(pl.col(name).mean().alias(f"{name}_mean")) agg_list.append(pl.count(name).alias(f"{name}_count")) agg_list.append(pl.std(name, ddof=0).alias(f"{name}_std")) - grouped = self._table._lazy_frame.sort(x_name).group_by(x_name, maintain_order=True).agg(agg_list).collect() + grouped_lazy = self._table._lazy_frame.sort(x_name).group_by(x_name, maintain_order=True).agg(agg_list) + grouped = _safe_collect_lazy_frame(grouped_lazy) x = grouped.get_column(x_name) y_s = [] @@ -575,8 +576,8 @@ def moving_average_plot( # Calculate the moving average mean_col = pl.col(y_name).mean().alias(y_name) - grouped = self._table._lazy_frame.sort(x_name).group_by(x_name, maintain_order=True).agg(mean_col).collect() - data = grouped + grouped_lazy = self._table._lazy_frame.sort(x_name).group_by(x_name, maintain_order=True).agg(mean_col) + data = _safe_collect_lazy_frame(grouped_lazy) moving_average = data.select([pl.col(y_name).rolling_mean(window_size).alias("moving_average")]) # set up the arrays for plotting y_data_with_nan = moving_average["moving_average"].to_numpy() diff --git a/src/safeds/data/tabular/transformation/_range_scaler.py b/src/safeds/data/tabular/transformation/_range_scaler.py index b025939ba..54def0379 100644 --- a/src/safeds/data/tabular/transformation/_range_scaler.py +++ b/src/safeds/data/tabular/transformation/_range_scaler.py @@ -2,7 +2,7 @@ from typing import TYPE_CHECKING -from safeds._utils import _structural_hash +from safeds._utils import _safe_collect_lazy_frame, _structural_hash from safeds._validation import _check_columns_are_numeric, _check_columns_exist from safeds.data.tabular.containers import Table from safeds.exceptions import NotFittedError @@ -117,8 +117,8 @@ def fit(self, table: Table) -> RangeScaler: raise ValueError("The RangeScaler cannot be fitted because the table contains 0 rows") # Learn the transformation - _data_min = table._lazy_frame.select(column_names).min().collect() - _data_max = table._lazy_frame.select(column_names).max().collect() + _data_min = _safe_collect_lazy_frame(table._lazy_frame.select(column_names).min()) + _data_max = _safe_collect_lazy_frame(table._lazy_frame.select(column_names).max()) # Create a copy with the learned transformation result = RangeScaler(min_=self._min, max_=self._max, selector=column_names) diff --git a/src/safeds/data/tabular/transformation/_robust_scaler.py b/src/safeds/data/tabular/transformation/_robust_scaler.py index a0565c72f..9d390872b 100644 --- a/src/safeds/data/tabular/transformation/_robust_scaler.py +++ b/src/safeds/data/tabular/transformation/_robust_scaler.py @@ -2,6 +2,7 @@ from typing import TYPE_CHECKING +from safeds._utils import _safe_collect_lazy_frame from safeds._validation import _check_columns_are_numeric, _check_columns_exist from safeds.data.tabular.containers import Table from safeds.exceptions import NotFittedError @@ -90,9 +91,9 @@ def fit(self, table: Table) -> RobustScaler: if table.row_count == 0: raise ValueError("The RobustScaler cannot be fitted because the table contains 0 rows") - _data_median = table._lazy_frame.select(column_names).median().collect() - q1 = table._lazy_frame.select(column_names).quantile(0.25).collect() - q3 = table._lazy_frame.select(column_names).quantile(0.75).collect() + _data_median = _safe_collect_lazy_frame(table._lazy_frame.select(column_names).median()) + q1 = _safe_collect_lazy_frame(table._lazy_frame.select(column_names).quantile(0.25)) + q3 = _safe_collect_lazy_frame(table._lazy_frame.select(column_names).quantile(0.75)) _data_scale = q3 - q1 # To make sure there is no division by zero diff --git a/src/safeds/data/tabular/transformation/_simple_imputer.py b/src/safeds/data/tabular/transformation/_simple_imputer.py index 41639677d..c152c2d40 100644 --- a/src/safeds/data/tabular/transformation/_simple_imputer.py +++ b/src/safeds/data/tabular/transformation/_simple_imputer.py @@ -4,7 +4,7 @@ from abc import ABC, abstractmethod from typing import Any -from safeds._utils import _structural_hash +from safeds._utils import _safe_collect_lazy_frame, _structural_hash from safeds._validation import _check_columns_are_numeric, _check_columns_exist from safeds.data.tabular.containers import Table from safeds.exceptions import NotFittedError @@ -276,7 +276,7 @@ def __str__(self) -> str: return "Mean" def _get_replacement(self, table: Table) -> dict[str, Any]: - return table._lazy_frame.mean().collect().to_dict() + return _safe_collect_lazy_frame(table._lazy_frame.mean()).to_dict() class _Median(SimpleImputer.Strategy): @@ -292,7 +292,7 @@ def __str__(self) -> str: return "Median" def _get_replacement(self, table: Table) -> dict[str, Any]: - return table._lazy_frame.median().collect().to_dict() + return _safe_collect_lazy_frame(table._lazy_frame.median()).to_dict() class _Mode(SimpleImputer.Strategy): diff --git a/src/safeds/data/tabular/transformation/_standard_scaler.py b/src/safeds/data/tabular/transformation/_standard_scaler.py index 5db98dade..8e71cf0ce 100644 --- a/src/safeds/data/tabular/transformation/_standard_scaler.py +++ b/src/safeds/data/tabular/transformation/_standard_scaler.py @@ -2,6 +2,7 @@ from typing import TYPE_CHECKING +from safeds._utils import _safe_collect_lazy_frame from safeds._validation import _check_columns_are_numeric, _check_columns_exist from safeds.data.tabular.containers import Table from safeds.exceptions import NotFittedError @@ -86,8 +87,8 @@ def fit(self, table: Table) -> StandardScaler: raise ValueError("The StandardScaler cannot be fitted because the table contains 0 rows") # Learn the transformation (ddof=0 is used to match the behavior of scikit-learn) - _data_mean = table._lazy_frame.select(column_names).mean().collect() - _data_standard_deviation = table._lazy_frame.select(column_names).std(ddof=0).collect() + _data_mean = _safe_collect_lazy_frame(table._lazy_frame.select(column_names).mean()) + _data_standard_deviation = _safe_collect_lazy_frame(table._lazy_frame.select(column_names).std(ddof=0)) # Create a copy with the learned transformation result = StandardScaler(selector=column_names) diff --git a/src/safeds/exceptions/__init__.py b/src/safeds/exceptions/__init__.py index f6dfaf87c..578cc4ddc 100644 --- a/src/safeds/exceptions/__init__.py +++ b/src/safeds/exceptions/__init__.py @@ -56,6 +56,10 @@ class IndexOutOfBoundsError(IndexError): """Raised when trying to access an invalid index.""" +class LazyComputationError(SafeDsError, RuntimeError): + """Raised when a lazy computation fails.""" + + class LengthMismatchError(SafeDsError, ValueError): """Raised when objects have different lengths.""" @@ -93,6 +97,7 @@ class SchemaError(SafeDsError, TypeError): "DuplicateColumnError", "FileExtensionError", "IndexOutOfBoundsError", + "LazyComputationError", "LengthMismatchError", "MissingValuesError", "NotFittedError", diff --git a/tests/helpers/__init__.py b/tests/helpers/__init__.py index 24b79b286..ddad13ead 100644 --- a/tests/helpers/__init__.py +++ b/tests/helpers/__init__.py @@ -2,7 +2,7 @@ assert_cell_operation_works, assert_row_operation_works, assert_tables_are_equal, - assert_that_tabular_datasets_are_equal, + assert_tabular_datasets_are_equal, ) from ._devices import ( configure_test_with_device, @@ -41,7 +41,7 @@ "assert_cell_operation_works", "assert_row_operation_works", "assert_tables_are_equal", - "assert_that_tabular_datasets_are_equal", + "assert_tabular_datasets_are_equal", "configure_test_with_device", "device_cpu", "device_cuda", diff --git a/tests/helpers/_assertions.py b/tests/helpers/_assertions.py index 0a0434ed5..2f7397f9f 100644 --- a/tests/helpers/_assertions.py +++ b/tests/helpers/_assertions.py @@ -5,6 +5,7 @@ from safeds.data.labeled.containers import TabularDataset from safeds.data.tabular.containers import Cell, Column, Row, Table +from safeds.data.tabular.typing import ColumnType def assert_tables_are_equal( @@ -44,7 +45,7 @@ def assert_tables_are_equal( ) -def assert_that_tabular_datasets_are_equal(table1: TabularDataset, table2: TabularDataset) -> None: +def assert_tabular_datasets_are_equal(table1: TabularDataset, table2: TabularDataset) -> None: """ Assert that two tabular datasets are equal. @@ -65,6 +66,8 @@ def assert_cell_operation_works( value: Any, transformer: Callable[[Cell], Cell], expected: Any, + *, + type_if_none: ColumnType | None = None, ) -> None: """ Assert that a cell operation works as expected. @@ -77,11 +80,14 @@ def assert_cell_operation_works( The transformer to apply to the cells. expected: The expected value of the transformed cell. + type_if_none: + The type of the column if the value is `None`. """ - column = Column("A", [value]) + type_ = type_if_none if value is None else None + column = Column("A", [value], type_=type_) transformed_column = column.transform(transformer) actual = transformed_column[0] - assert actual == expected + assert actual == expected, f"Expected {expected}, but got {actual}." def assert_row_operation_works( diff --git a/tests/helpers/_devices.py b/tests/helpers/_devices.py index 3c4d4ce9a..7d2bc0803 100644 --- a/tests/helpers/_devices.py +++ b/tests/helpers/_devices.py @@ -19,10 +19,10 @@ def get_devices_ids() -> list[str]: def configure_test_with_device(device: Device) -> None: - _skip_if_device_not_available(device) # This will end the function if device is not available + skip_if_device_not_available(device) # This will end the function if device is not available _set_default_device(device) -def _skip_if_device_not_available(device: Device) -> None: +def skip_if_device_not_available(device: Device) -> None: if device == device_cuda and not torch.cuda.is_available(): - pytest.skip("This test requires cuda") + pytest.skip("This test requires CUDA.") diff --git a/tests/safeds/_config/test_torch.py b/tests/safeds/_config/test_torch.py index 9b993df74..0bc8b988b 100644 --- a/tests/safeds/_config/test_torch.py +++ b/tests/safeds/_config/test_torch.py @@ -4,7 +4,7 @@ from safeds._config import _get_device, _init_default_device, _set_default_device from tests.helpers import configure_test_with_device, device_cpu, device_cuda, get_devices, get_devices_ids -from tests.helpers._devices import _skip_if_device_not_available +from tests.helpers._devices import skip_if_device_not_available @pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) @@ -16,7 +16,7 @@ def test_default_device(device: Device) -> None: @pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) def test_set_default_device(device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) _set_default_device(device) assert _get_device().type == device.type assert torch.get_default_device().type == device.type diff --git a/tests/safeds/_utils/test_lazy.py b/tests/safeds/_utils/test_lazy.py new file mode 100644 index 000000000..0cba7d88f --- /dev/null +++ b/tests/safeds/_utils/test_lazy.py @@ -0,0 +1,17 @@ +import polars as pl +import pytest + +from safeds._utils import _safe_collect_lazy_frame, _safe_collect_lazy_frame_schema +from safeds.exceptions import LazyComputationError + + +def test_safe_collect_lazy_frame() -> None: + frame = pl.LazyFrame().select("a") + with pytest.raises(LazyComputationError): + _safe_collect_lazy_frame(frame) + + +def test_safe_collect_lazy_frame_schema() -> None: + frame = pl.LazyFrame().select("a") + with pytest.raises(LazyComputationError): + _safe_collect_lazy_frame_schema(frame) diff --git a/tests/safeds/data/tabular/containers/_column/test_eq.py b/tests/safeds/data/tabular/containers/_column/test_eq.py index 1136a2c81..566d72a52 100644 --- a/tests/safeds/data/tabular/containers/_column/test_eq.py +++ b/tests/safeds/data/tabular/containers/_column/test_eq.py @@ -93,7 +93,7 @@ def test_should_return_true_if_objects_are_identical(column: Column) -> None: ("column", "other"), [ (Column("col1", []), None), - (Column("col1", []), Cell.from_literal(1)), + (Column("col1", []), Cell.constant(1)), ], ids=[ "Column vs. None", diff --git a/tests/safeds/data/tabular/containers/_column/test_transform.py b/tests/safeds/data/tabular/containers/_column/test_transform.py index 85821c849..e17ceb4c9 100644 --- a/tests/safeds/data/tabular/containers/_column/test_transform.py +++ b/tests/safeds/data/tabular/containers/_column/test_transform.py @@ -10,7 +10,7 @@ [ ( lambda: Column("col1", []), - lambda _: Cell.from_literal(None), + lambda _: Cell.constant(None), Column("col1", []), ), ( @@ -20,7 +20,7 @@ ), ( lambda: Column("col1", [1, 2, 3]), - lambda _: Cell.from_literal(None), + lambda _: Cell.constant(None), Column("col1", [None, None, None]), ), ( diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/__snapshots__/test_hash.ambr b/tests/safeds/data/tabular/containers/_lazy_cell/__snapshots__/test_hash.ambr new file mode 100644 index 000000000..34f48031a --- /dev/null +++ b/tests/safeds/data/tabular/containers/_lazy_cell/__snapshots__/test_hash.ambr @@ -0,0 +1,13 @@ +# serializer version: 1 +# name: TestContract.test_should_return_same_hash_in_different_processes[column] + 8162512882156938440 +# --- +# name: TestContract.test_should_return_same_hash_in_different_processes[constant] + 4610312201483200147 +# --- +# name: TestContract.test_should_return_same_hash_in_different_processes[date, column] + 740357503917492401 +# --- +# name: TestContract.test_should_return_same_hash_in_different_processes[date, int] + 495023986348121879 +# --- diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_abs.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_abs.py index a01749e32..7cc40c878 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_abs.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_abs.py @@ -1,5 +1,6 @@ import pytest +from safeds.data.tabular.typing import ColumnType from tests.helpers import assert_cell_operation_works @@ -12,6 +13,7 @@ (10.5, 10.5), (-10, 10), (-10.5, 10.5), + (None, None), ], ids=[ "zero int", @@ -20,11 +22,12 @@ "positive float", "negative int", "negative float", + "None", ], ) -class TestShouldReturnAbsoluteValueOfCell: - def test_dunder_method(self, value: float, expected: float) -> None: - assert_cell_operation_works(value, lambda cell: abs(cell), expected) +class TestShouldReturnAbsoluteValue: + def test_dunder_method(self, value: float | None, expected: float | None) -> None: + assert_cell_operation_works(value, lambda cell: abs(cell), expected, type_if_none=ColumnType.float64()) - def test_named_method(self, value: float, expected: float) -> None: - assert_cell_operation_works(value, lambda cell: cell.abs(), expected) + def test_named_method(self, value: float | None, expected: float | None) -> None: + assert_cell_operation_works(value, lambda cell: cell.abs(), expected, type_if_none=ColumnType.float64()) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_add.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_add.py index 9a2d59a39..f760cbe2b 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_add.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_add.py @@ -12,29 +12,53 @@ (3, 1.5, 4.5), (1.5, 3, 4.5), (1.5, 1.5, 3.0), + (None, 3, None), + (3, None, None), ], ids=[ "int - int", "int - float", "float - int", "float - float", + "left is None", + "right is None", ], ) class TestShouldComputeAddition: - def test_dunder_method(self, value1: float, value2: float, expected: float) -> None: + def test_dunder_method(self, value1: float | None, value2: float | None, expected: float | None) -> None: assert_cell_operation_works(value1, lambda cell: cell + value2, expected) - def test_dunder_method_wrapped_in_cell(self, value1: float, value2: float, expected: float) -> None: + def test_dunder_method_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: float | None, + ) -> None: assert_cell_operation_works(value1, lambda cell: cell + _LazyCell(pl.lit(value2)), expected) - def test_dunder_method_inverted_order(self, value1: float, value2: float, expected: float) -> None: + def test_dunder_method_inverted_order( + self, + value1: float | None, + value2: float | None, + expected: float | None, + ) -> None: assert_cell_operation_works(value2, lambda cell: value1 + cell, expected) - def test_dunder_method_inverted_order_wrapped_in_cell(self, value1: float, value2: float, expected: float) -> None: + def test_dunder_method_inverted_order_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: float | None, + ) -> None: assert_cell_operation_works(value2, lambda cell: _LazyCell(pl.lit(value1)) + cell, expected) - def test_named_method(self, value1: float, value2: float, expected: float) -> None: + def test_named_method(self, value1: float | None, value2: float | None, expected: float | None) -> None: assert_cell_operation_works(value1, lambda cell: cell.add(value2), expected) - def test_named_method_wrapped_in_cell(self, value1: float, value2: float, expected: float) -> None: + def test_named_method_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: float | None, + ) -> None: assert_cell_operation_works(value1, lambda cell: cell.add(_LazyCell(pl.lit(value2))), expected) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_and.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_and.py index ffbe3786f..0d3a08c29 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_and.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_and.py @@ -4,6 +4,7 @@ import pytest from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.typing import ColumnType from tests.helpers import assert_cell_operation_works @@ -12,39 +13,69 @@ [ (False, False, False), (False, True, False), + (False, None, False), (True, False, False), (True, True, True), + (True, None, None), + (None, False, False), + (None, True, None), + (None, None, None), (0, False, False), (0, True, False), (1, False, False), (1, True, True), ], ids=[ - "false - false", - "false - true", - "true - false", - "true - true", - "falsy int - false", - "falsy int - true", - "truthy int - false", - "truthy int - true", + "False - False", + "False - True", + "False - None", + "True - False", + "True - True", + "True - None", + "None - False", + "None - True", + "None - None", + "falsy int - False", + "falsy int - True", + "truthy int - False", + "truthy int - True", ], ) class TestShouldComputeConjunction: - def test_dunder_method(self, value1: Any, value2: bool, expected: bool) -> None: - assert_cell_operation_works(value1, lambda cell: cell & value2, expected) + def test_dunder_method(self, value1: Any, value2: bool | None, expected: bool | None) -> None: + assert_cell_operation_works(value1, lambda cell: cell & value2, expected, type_if_none=ColumnType.boolean()) - def test_dunder_method_wrapped_in_cell(self, value1: Any, value2: bool, expected: bool) -> None: - assert_cell_operation_works(value1, lambda cell: cell & _LazyCell(pl.lit(value2)), expected) + def test_dunder_method_wrapped_in_cell(self, value1: Any, value2: bool | None, expected: bool | None) -> None: + assert_cell_operation_works( + value1, + lambda cell: cell & _LazyCell(pl.lit(value2)), + expected, + type_if_none=ColumnType.boolean(), + ) - def test_dunder_method_inverted_order(self, value1: Any, value2: bool, expected: bool) -> None: - assert_cell_operation_works(value2, lambda cell: value1 & cell, expected) + def test_dunder_method_inverted_order(self, value1: Any, value2: bool | None, expected: bool | None) -> None: + assert_cell_operation_works(value2, lambda cell: value1 & cell, expected, type_if_none=ColumnType.boolean()) - def test_dunder_method_inverted_order_wrapped_in_cell(self, value1: Any, value2: bool, expected: bool) -> None: - assert_cell_operation_works(value2, lambda cell: _LazyCell(pl.lit(value1)) & cell, expected) + def test_dunder_method_inverted_order_wrapped_in_cell( + self, + value1: Any, + value2: bool | None, + expected: bool | None, + ) -> None: + assert_cell_operation_works( + value2, + lambda cell: _LazyCell(pl.lit(value1)) & cell, + expected, + type_if_none=ColumnType.boolean(), + ) - def test_named_method(self, value1: Any, value2: bool, expected: bool) -> None: - assert_cell_operation_works(value1, lambda cell: cell.and_(value2), expected) + def test_named_method(self, value1: Any, value2: bool | None, expected: bool | None) -> None: + assert_cell_operation_works(value1, lambda cell: cell.and_(value2), expected, type_if_none=ColumnType.boolean()) - def test_named_method_wrapped_in_cell(self, value1: Any, value2: bool, expected: bool) -> None: - assert_cell_operation_works(value1, lambda cell: cell.and_(_LazyCell(pl.lit(value2))), expected) + def test_named_method_wrapped_in_cell(self, value1: Any, value2: bool | None, expected: bool | None) -> None: + assert_cell_operation_works( + value1, + lambda cell: cell.and_(_LazyCell(pl.lit(value2))), + expected, + type_if_none=ColumnType.boolean(), + ) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_cast.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_cast.py new file mode 100644 index 000000000..37528c696 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_cast.py @@ -0,0 +1,23 @@ +from typing import Any + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "type_", "expected"), + [ + (1, ColumnType.string(), "1"), + ("1", ColumnType.int64(), 1), + (None, ColumnType.int64(), None), + ], + ids=[ + "int64 to string", + "string to int64", + "None to int64", + ], +) +def test_should_cast_values_to_requested_type(value: Any, type_: ColumnType, expected: Any) -> None: + assert_cell_operation_works(value, lambda cell: cell.cast(type_), expected) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_ceil.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_ceil.py index b1cd6bed0..32dce3d20 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_ceil.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_ceil.py @@ -2,6 +2,7 @@ import pytest +from safeds.data.tabular.typing import ColumnType from tests.helpers import assert_cell_operation_works @@ -14,6 +15,7 @@ (10.5, 11), (-10, -10), (-10.5, -10), + (None, None), ], ids=[ "zero int", @@ -22,11 +24,12 @@ "positive float", "negative int", "negative float", + "None", ], ) -class TestShouldReturnCeilOfCell: - def test_dunder_method(self, value: float, expected: float) -> None: - assert_cell_operation_works(value, lambda cell: math.ceil(cell), expected) +class TestShouldReturnCeiling: + def test_dunder_method(self, value: float | None, expected: float | None) -> None: + assert_cell_operation_works(value, lambda cell: math.ceil(cell), expected, type_if_none=ColumnType.float64()) - def test_named_method(self, value: float, expected: float) -> None: - assert_cell_operation_works(value, lambda cell: cell.ceil(), expected) + def test_named_method(self, value: float | None, expected: float | None) -> None: + assert_cell_operation_works(value, lambda cell: cell.ceil(), expected, type_if_none=ColumnType.float64()) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_constant.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_constant.py new file mode 100644 index 000000000..1be74f34a --- /dev/null +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_constant.py @@ -0,0 +1,21 @@ +from typing import Any + +import pytest + +from safeds.data.tabular.containers import Cell +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + "value", + [ + None, + 1, + ], + ids=[ + "None", + "int", + ], +) +def test_should_return_constant_value(value: Any) -> None: + assert_cell_operation_works(None, lambda _: Cell.constant(value), value) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_date.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_date.py new file mode 100644 index 000000000..e052f1e91 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_date.py @@ -0,0 +1,44 @@ +from datetime import date + +import pytest + +from safeds._typing import _ConvertibleToIntCell +from safeds.data.tabular.containers import Cell +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("year", "month", "day", "expected"), + [ + (1, 2, 3, date(1, 2, 3)), + (Cell.constant(1), Cell.constant(2), Cell.constant(3), date(1, 2, 3)), + # invalid year + (None, 2, 3, None), + # invalid month + (1, None, 3, None), + (1, 0, 3, None), + (1, 13, 3, None), + # invalid day + (1, 2, None, None), + (1, 2, 0, None), + (1, 2, 32, None), + ], + ids=[ + "int components", + "cell components", + "year is None", + "month is None", + "month is too low", + "month is too high", + "day is None", + "day is too low", + "day is too high", + ], +) +def test_should_return_date( + year: _ConvertibleToIntCell, + month: _ConvertibleToIntCell, + day: _ConvertibleToIntCell, + expected: date, +) -> None: + assert_cell_operation_works(None, lambda _: Cell.date(year, month, day), expected) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_datetime.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_datetime.py new file mode 100644 index 000000000..dcab1d935 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_datetime.py @@ -0,0 +1,106 @@ +from datetime import datetime + +import pytest + +from safeds._typing import _ConvertibleToIntCell +from safeds.data.tabular.containers import Cell +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("year", "month", "day", "hour", "minute", "second", "microsecond", "expected"), + [ + ( + 1, + 2, + 3, + 4, + 5, + 6, + 7, + datetime(1, 2, 3, 4, 5, 6, 7), # noqa: DTZ001 + ), + ( + Cell.constant(1), + Cell.constant(2), + Cell.constant(3), + Cell.constant(4), + Cell.constant(5), + Cell.constant(6), + Cell.constant(7), + datetime(1, 2, 3, 4, 5, 6, 7), # noqa: DTZ001 + ), + # invalid year + (None, 2, 3, 4, 5, 6, 7, None), + # invalid month + (1, None, 3, 4, 5, 6, 7, None), + (1, 0, 3, 4, 5, 6, 7, None), + (1, 13, 3, 4, 5, 6, 7, None), + # invalid day + (1, 2, None, 4, 5, 6, 7, None), + (1, 2, 0, 4, 5, 6, 7, None), + (1, 2, 32, 4, 5, 6, 7, None), + # invalid hour + (1, 2, 3, None, 5, 6, 7, None), + (1, 2, 3, -1, 5, 6, 7, None), + (1, 2, 3, 24, 5, 6, 7, None), + # invalid minute + (1, 2, 3, 4, None, 6, 7, None), + (1, 2, 3, 4, -1, 6, 7, None), + (1, 2, 3, 4, 60, 6, 7, None), + # invalid second + (1, 2, 3, 4, 5, None, 7, None), + (1, 2, 3, 4, 5, -1, 7, None), + (1, 2, 3, 4, 5, 60, 7, None), + # invalid microsecond + (1, 2, 3, 4, 5, 6, None, None), + (1, 2, 3, 4, 5, 6, -1, None), + (1, 2, 3, 4, 5, 6, 1_000_000, None), + ], + ids=[ + "int components", + "cell components", + "year is None", + "month is None", + "month is too low", + "month is too high", + "day is None", + "day is too low", + "day is too high", + "hour is None", + "hour is too low", + "hour is too high", + "minute is None", + "minute is too low", + "minute is too high", + "second is None", + "second is too low", + "second is too high", + "microsecond is None", + "microsecond is too low", + "microsecond is too high", + ], +) +def test_should_return_datetime( + year: _ConvertibleToIntCell, + month: _ConvertibleToIntCell, + day: _ConvertibleToIntCell, + hour: _ConvertibleToIntCell, + minute: _ConvertibleToIntCell, + second: _ConvertibleToIntCell, + microsecond: _ConvertibleToIntCell, + expected: datetime, +) -> None: + assert_cell_operation_works( + None, + lambda _: Cell.datetime( + year, + month, + day, + hour=hour, + minute=minute, + second=second, + microsecond=microsecond, + ), + expected, + ) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_div.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_div.py index 1aa8ab692..8501a4f65 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_div.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_div.py @@ -12,29 +12,53 @@ (3, 1.5, 2.0), (1.5, 3, 0.5), (1.5, 1.5, 1.0), + (None, 3, None), + (3, None, None), ], ids=[ "int - int", "int - float", "float - int", "float - float", + "left is None", + "right is None", ], ) class TestShouldComputeDivision: - def test_dunder_method(self, value1: float, value2: float, expected: float) -> None: + def test_dunder_method(self, value1: float | None, value2: float | None, expected: float | None) -> None: assert_cell_operation_works(value1, lambda cell: cell / value2, expected) - def test_dunder_method_wrapped_in_cell(self, value1: float, value2: float, expected: float) -> None: + def test_dunder_method_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: float | None, + ) -> None: assert_cell_operation_works(value1, lambda cell: cell / _LazyCell(pl.lit(value2)), expected) - def test_dunder_method_inverted_order(self, value1: float, value2: float, expected: float) -> None: + def test_dunder_method_inverted_order( + self, + value1: float | None, + value2: float | None, + expected: float | None, + ) -> None: assert_cell_operation_works(value2, lambda cell: value1 / cell, expected) - def test_dunder_method_inverted_order_wrapped_in_cell(self, value1: float, value2: float, expected: float) -> None: + def test_dunder_method_inverted_order_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: float | None, + ) -> None: assert_cell_operation_works(value2, lambda cell: _LazyCell(pl.lit(value1)) / cell, expected) - def test_named_method(self, value1: float, value2: float, expected: float) -> None: + def test_named_method(self, value1: float | None, value2: float | None, expected: float | None) -> None: assert_cell_operation_works(value1, lambda cell: cell.div(value2), expected) - def test_named_method_wrapped_in_cell(self, value1: float, value2: float, expected: float) -> None: + def test_named_method_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: float | None, + ) -> None: assert_cell_operation_works(value1, lambda cell: cell.div(_LazyCell(pl.lit(value2))), expected) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_duration.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_duration.py new file mode 100644 index 000000000..1d0784353 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_duration.py @@ -0,0 +1,86 @@ +from datetime import timedelta + +import pytest + +from safeds._typing import _ConvertibleToIntCell +from safeds.data.tabular.containers import Cell +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("weeks", "days", "hours", "minutes", "seconds", "milliseconds", "microseconds", "expected"), + [ + ( + 1, + 2, + 3, + 4, + 5, + 6, + 7, + timedelta(weeks=1, days=2, hours=3, minutes=4, seconds=5, milliseconds=6, microseconds=7), + ), + ( + -1, + -2, + -3, + -4, + -5, + -6, + -7, + timedelta(weeks=-1, days=-2, hours=-3, minutes=-4, seconds=-5, milliseconds=-6, microseconds=-7), + ), + ( + Cell.constant(1), + Cell.constant(2), + Cell.constant(3), + Cell.constant(4), + Cell.constant(5), + Cell.constant(6), + Cell.constant(7), + timedelta(weeks=1, days=2, hours=3, minutes=4, seconds=5, milliseconds=6, microseconds=7), + ), + (None, 2, 3, 4, 5, 6, 7, None), + (1, None, 3, 4, 5, 6, 7, None), + (1, 2, None, 4, 5, 6, 7, None), + (1, 2, 3, None, 5, 6, 7, None), + (1, 2, 3, 4, None, 6, 7, None), + (1, 2, 3, 4, 5, None, 7, None), + (1, 2, 3, 4, 5, 6, None, None), + ], + ids=[ + "positive int components", + "negative int components", + "cell components", + "weeks is None", + "days is None", + "hours is None", + "minutes is None", + "seconds is None", + "microseconds is None", + "milliseconds is None", + ], +) +def test_should_return_duration( + weeks: _ConvertibleToIntCell, + days: _ConvertibleToIntCell, + hours: _ConvertibleToIntCell, + minutes: _ConvertibleToIntCell, + seconds: _ConvertibleToIntCell, + milliseconds: _ConvertibleToIntCell, + microseconds: _ConvertibleToIntCell, + expected: timedelta, +) -> None: + assert_cell_operation_works( + None, + lambda _: Cell.duration( + weeks=weeks, + days=days, + hours=hours, + minutes=minutes, + seconds=seconds, + milliseconds=milliseconds, + microseconds=microseconds, + ), + expected, + ) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_eq.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_eq.py index 0cd8e0b02..19e82c373 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_eq.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_eq.py @@ -12,29 +12,88 @@ (3, 1.5, False), (1.5, 3, False), (1.5, 1.5, True), + (None, 3, None), + (3, None, None), ], ids=[ "int - int", "int - float", "float - int", "float - float", + "left is None", + "right is None", ], ) class TestShouldComputeEquality: - def test_dunder_method(self, value1: float, value2: float, expected: bool) -> None: + def test_dunder_method(self, value1: float | None, value2: float | None, expected: bool | None) -> None: assert_cell_operation_works(value1, lambda cell: cell == value2, expected) - def test_dunder_method_wrapped_in_cell(self, value1: float, value2: float, expected: bool) -> None: + def test_dunder_method_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: bool | None, + ) -> None: assert_cell_operation_works(value1, lambda cell: cell == _LazyCell(pl.lit(value2)), expected) - def test_dunder_method_inverted_order(self, value1: float, value2: float, expected: bool) -> None: + def test_dunder_method_inverted_order( + self, + value1: float | None, + value2: float | None, + expected: bool | None, + ) -> None: assert_cell_operation_works(value2, lambda cell: value1 == cell, expected) # type: ignore[arg-type,return-value] - def test_dunder_method_inverted_order_wrapped_in_cell(self, value1: float, value2: float, expected: bool) -> None: + def test_dunder_method_inverted_order_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: bool | None, + ) -> None: assert_cell_operation_works(value2, lambda cell: _LazyCell(pl.lit(value1)) == cell, expected) # type: ignore[arg-type,return-value] - def test_named_method(self, value1: float, value2: float, expected: bool) -> None: + def test_named_method(self, value1: float | None, value2: float | None, expected: bool | None) -> None: assert_cell_operation_works(value1, lambda cell: cell.eq(value2), expected) - def test_named_method_wrapped_in_cell(self, value1: float, value2: float, expected: bool) -> None: + def test_named_method_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: bool | None, + ) -> None: assert_cell_operation_works(value1, lambda cell: cell.eq(_LazyCell(pl.lit(value2))), expected) + + +@pytest.mark.parametrize( + ("value1", "value2", "expected"), + [ + (None, 3, False), + (3, None, False), + (None, None, True), + ], + ids=[ + "left is None", + "right is None", + "both are None", + ], +) +class TestShouldComputeEqualityWithoutPropagatingMissingValues: + def test_named_method( + self, + value1: float | None, + value2: float | None, + expected: bool | None, + ) -> None: + assert_cell_operation_works(value1, lambda cell: cell.eq(value2, propagate_missing_values=False), expected) + + def test_named_method_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: bool | None, + ) -> None: + assert_cell_operation_works( + value1, + lambda cell: cell.eq(_LazyCell(pl.lit(value2)), propagate_missing_values=False), + expected, + ) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_equals.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_equals.py index e5f1c01eb..023cdf615 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_equals.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_equals.py @@ -3,40 +3,120 @@ import polars as pl import pytest -from safeds.data.tabular.containers import Cell, Table +from safeds.data.tabular.containers import Cell, Column from safeds.data.tabular.containers._lazy_cell import _LazyCell @pytest.mark.parametrize( - ("cell1", "cell2", "expected"), + ("cell_1", "cell_2", "expected"), [ - (_LazyCell(pl.col("a")), _LazyCell(pl.col("a")), True), - (_LazyCell(pl.col("a")), _LazyCell(pl.col("b")), False), + # equal (constant) + ( + Cell.constant(1), + Cell.constant(1), + True, + ), + # equal (date, int) + ( + Cell.date(2025, 1, 15), + Cell.date(2025, 1, 15), + True, + ), + # equal (date, column) + ( + Cell.date(_LazyCell(pl.col("a")), 1, 15), + Cell.date(_LazyCell(pl.col("a")), 1, 15), + True, + ), + # equal (column) + ( + _LazyCell(pl.col("a")), + _LazyCell(pl.col("a")), + True, + ), + # not equal (different constant value) + ( + Cell.constant(1), + Cell.constant(2), + False, + ), + # not equal (different constant type) + ( + Cell.constant(1), + Cell.constant("1"), + False, + ), + # not equal (different date, int) + ( + Cell.date(2025, 1, 15), + Cell.date(2024, 1, 15), + False, + ), + # not equal (different date, column) + ( + Cell.date(_LazyCell(pl.col("a")), 1, 15), + Cell.date(_LazyCell(pl.col("b")), 1, 15), + False, + ), + # not equal (different column) + ( + _LazyCell(pl.col("a")), + _LazyCell(pl.col("b")), + False, + ), + # not equal (different cell kinds) + ( + Cell.date(23, 1, 15), + Cell.time(23, 1, 15), + False, + ), ], ids=[ - "equal", - "different", + # Equal + "equal (constant)", + "equal (date, int)", + "equal (date, column)", + "equal (column)", + # Not equal + "not equal (different constant value)", + "not equal (different constant type)", + "not equal (different date, int)", + "not equal (different date, column)", + "not equal (different column)", + "not equal (different cell kinds)", ], ) -def test_should_return_whether_two_cells_are_equal(cell1: Cell, cell2: Cell, expected: bool) -> None: - assert (cell1._equals(cell2)) == expected +def test_should_return_whether_objects_are_equal(cell_1: Cell, cell_2: Cell, expected: bool) -> None: + assert (cell_1._equals(cell_2)) == expected -def test_should_return_true_if_objects_are_identical() -> None: - cell: Cell[Any] = _LazyCell(pl.col("a")) +@pytest.mark.parametrize( + "cell", + [ + Cell.constant(1), + Cell.date(2025, 1, 15), + _LazyCell(pl.col("a")), + ], + ids=[ + "constant", + "date", + "column", + ], +) +def test_should_return_true_if_objects_are_identical(cell: Cell) -> None: assert (cell._equals(cell)) is True @pytest.mark.parametrize( ("cell", "other"), [ - (_LazyCell(pl.col("a")), None), - (_LazyCell(pl.col("a")), Table({})), + (Cell.constant(1), None), + (Cell.constant(1), Column("col1", [1])), ], ids=[ "Cell vs. None", - "Cell vs. Table", + "Cell vs. Column", ], ) -def test_should_return_not_implemented_if_other_is_not_cell(cell: Cell, other: Any) -> None: +def test_should_return_not_implemented_if_other_has_different_type(cell: Cell, other: Any) -> None: assert (cell._equals(other)) is NotImplemented diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_first_not_none.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_first_not_none.py index 91f38f4b5..77f5fea33 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_first_not_none.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_first_not_none.py @@ -1,58 +1,29 @@ -from datetime import date, time +from typing import Any -import polars as pl import pytest +from safeds.data.tabular.containers import Table from safeds.data.tabular.containers._cell import Cell -from safeds.data.tabular.containers._lazy_cell import _LazyCell +_none_cell = Cell.constant(None) -class TestFirstNotNone: - def test_should_return_none(self) -> None: - to_eval: list[Cell] = [_LazyCell(None) for i in range(5)] - res = Cell.first_not_none(to_eval) - assert res.eq(_LazyCell(None)) - @pytest.mark.parametrize( - ("list_of_cells", "expected"), - [ - ([_LazyCell(None), _LazyCell(1), _LazyCell(None), _LazyCell(4)], _LazyCell(1)), - ([_LazyCell(i) for i in range(5)], _LazyCell(1)), - ( - [ - _LazyCell(None), - _LazyCell(None), - _LazyCell(pl.lit("Hello, World!")), - _LazyCell(pl.lit("Not returned")), - ], - _LazyCell("Hello, World!"), - ), - ([_LazyCell(pl.lit(i)) for i in ["a", "b", "c", "d"]], _LazyCell(pl.lit("a"))), - ([_LazyCell(i) for i in [None, time(0, 0, 0, 0), None, time(1, 1, 1, 1)]], _LazyCell(time(0, 0, 0, 0))), - ( - [_LazyCell(i) for i in [time(0, 0, 0, 0), time(1, 1, 1, 1), time(2, 2, 2, 2), time(3, 3, 3, 3)]], - _LazyCell(time(0, 0, 0, 0)), - ), - ([_LazyCell(i) for i in [None, date(2000, 1, 1), date(1098, 3, 4), None]], _LazyCell(date(2000, 1, 1))), - ([_LazyCell(date(2000, 3, i)) for i in range(1, 5)], _LazyCell(date(2000, 3, 1))), - ([_LazyCell(i) for i in [None, pl.lit("a"), 1, time(0, 0, 0, 0)]], _LazyCell(pl.lit("a"))), - ([_LazyCell(i) for i in [time(1, 1, 1, 1), 0, pl.lit("c"), date(2020, 1, 7)]], _LazyCell(time(1, 1, 1, 1))), - ([], _LazyCell(None)), - ], - ids=[ - "numeric_with_null", - "numeric_no_null", - "strings_with_null", - "strings_no_null", - "times_with_null", - "times_no_null", - "dates_with_null", - "dates_no_null", - "mixed_with_null", - "mixed_no_null", - "empty_list", - ], - ) - def test_should_return_first_non_none_value(self, list_of_cells: list[Cell], expected: Cell) -> None: - res = Cell.first_not_none(list_of_cells) - assert res.eq(expected) +@pytest.mark.parametrize( + ("cells", "expected"), + [ + ([], None), + ([_none_cell], None), + ([_none_cell, Cell.constant(1)], 1), + ([Cell.constant(1), _none_cell, Cell.constant(2)], 1), + ], + ids=[ + "empty", + "all None", + "one not None", + "multiple not None", + ], +) +def test_should_return_first_non_none_value(cells: list[Cell], expected: Any) -> None: + table = Table({"col1": [1]}) + actual = table.add_computed_column("col2", lambda _: Cell.first_not_none(cells)) + assert actual.get_column("col2").get_value(0) == expected diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_floor.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_floor.py index 72590efa8..73ecf8e85 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_floor.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_floor.py @@ -2,6 +2,7 @@ import pytest +from safeds.data.tabular.typing import ColumnType from tests.helpers import assert_cell_operation_works @@ -14,6 +15,7 @@ (10.5, 10), (-10, -10), (-10.5, -11), + (None, None), ], ids=[ "zero int", @@ -22,11 +24,12 @@ "positive float", "negative int", "negative float", + "None", ], ) -class TestShouldReturnFloorOfCell: - def test_dunder_method(self, value: float, expected: float) -> None: - assert_cell_operation_works(value, lambda cell: math.floor(cell), expected) +class TestShouldReturnFloor: + def test_dunder_method(self, value: float | None, expected: float | None) -> None: + assert_cell_operation_works(value, lambda cell: math.floor(cell), expected, type_if_none=ColumnType.float64()) - def test_named_method(self, value: float, expected: float) -> None: - assert_cell_operation_works(value, lambda cell: cell.floor(), expected) + def test_named_method(self, value: float | None, expected: float | None) -> None: + assert_cell_operation_works(value, lambda cell: cell.floor(), expected, type_if_none=ColumnType.float64()) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_floordiv.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_floordiv.py index e2b2316bb..dd9a3ea36 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_floordiv.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_floordiv.py @@ -12,23 +12,42 @@ (3, 1.6, 1), (1.5, 3, 0), (1.5, 1.4, 1), + (None, 3, None), + (3, None, None), ], ids=[ "int - int", "int - float", "float - int", "float - float", + "left is None", + "right is None", ], ) -class TestShouldComputeDivision: - def test_dunder_method(self, value1: float, value2: float, expected: float) -> None: +class TestShouldComputeFlooredDivision: + def test_dunder_method(self, value1: float | None, value2: float | None, expected: float | None) -> None: assert_cell_operation_works(value1, lambda cell: cell // value2, expected) - def test_dunder_method_wrapped_in_cell(self, value1: float, value2: float, expected: float) -> None: + def test_dunder_method_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: float | None, + ) -> None: assert_cell_operation_works(value1, lambda cell: cell // _LazyCell(pl.lit(value2)), expected) - def test_dunder_method_inverted_order(self, value1: float, value2: float, expected: float) -> None: + def test_dunder_method_inverted_order( + self, + value1: float | None, + value2: float | None, + expected: float | None, + ) -> None: assert_cell_operation_works(value2, lambda cell: value1 // cell, expected) - def test_dunder_method_inverted_order_wrapped_in_cell(self, value1: float, value2: float, expected: float) -> None: + def test_dunder_method_inverted_order_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: float | None, + ) -> None: assert_cell_operation_works(value2, lambda cell: _LazyCell(pl.lit(value1)) // cell, expected) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_ge.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_ge.py index 2269066c7..b30bd15f1 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_ge.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_ge.py @@ -12,29 +12,53 @@ (3, 1.5, True), (1.5, 3, False), (1.5, 1.5, True), + (None, 3, None), + (3, None, None), ], ids=[ "int - int", "int - float", "float - int", "float - float", + "left is None", + "right is None", ], ) class TestShouldComputeGreaterThanOrEqual: - def test_dunder_method(self, value1: float, value2: float, expected: bool) -> None: + def test_dunder_method(self, value1: float | None, value2: float | None, expected: bool | None) -> None: assert_cell_operation_works(value1, lambda cell: cell >= value2, expected) - def test_dunder_method_wrapped_in_cell(self, value1: float, value2: float, expected: bool) -> None: + def test_dunder_method_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: bool | None, + ) -> None: assert_cell_operation_works(value1, lambda cell: cell >= _LazyCell(pl.lit(value2)), expected) - def test_dunder_method_inverted_order(self, value1: float, value2: float, expected: bool) -> None: + def test_dunder_method_inverted_order( + self, + value1: float | None, + value2: float | None, + expected: bool | None, + ) -> None: assert_cell_operation_works(value2, lambda cell: value1 >= cell, expected) - def test_dunder_method_inverted_order_wrapped_in_cell(self, value1: float, value2: float, expected: bool) -> None: + def test_dunder_method_inverted_order_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: bool | None, + ) -> None: assert_cell_operation_works(value2, lambda cell: _LazyCell(pl.lit(value1)) >= cell, expected) - def test_named_method(self, value1: float, value2: float, expected: bool) -> None: + def test_named_method(self, value1: float | None, value2: float | None, expected: bool | None) -> None: assert_cell_operation_works(value1, lambda cell: cell.ge(value2), expected) - def test_named_method_wrapped_in_cell(self, value1: float, value2: float, expected: bool) -> None: + def test_named_method_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: bool | None, + ) -> None: assert_cell_operation_works(value1, lambda cell: cell.ge(_LazyCell(pl.lit(value2))), expected) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_gt.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_gt.py index 4c39978d3..3701ceae3 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_gt.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_gt.py @@ -12,29 +12,53 @@ (3, 1.5, True), (1.5, 3, False), (1.5, 1.5, False), + (None, 3, None), + (3, None, None), ], ids=[ "int - int", "int - float", "float - int", "float - float", + "left is None", + "right is None", ], ) class TestShouldComputeGreaterThan: - def test_dunder_method(self, value1: float, value2: float, expected: bool) -> None: + def test_dunder_method(self, value1: float | None, value2: float | None, expected: bool | None) -> None: assert_cell_operation_works(value1, lambda cell: cell > value2, expected) - def test_dunder_method_wrapped_in_cell(self, value1: float, value2: float, expected: bool) -> None: + def test_dunder_method_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: bool | None, + ) -> None: assert_cell_operation_works(value1, lambda cell: cell > _LazyCell(pl.lit(value2)), expected) - def test_dunder_method_inverted_order(self, value1: float, value2: float, expected: bool) -> None: + def test_dunder_method_inverted_order( + self, + value1: float | None, + value2: float | None, + expected: bool | None, + ) -> None: assert_cell_operation_works(value2, lambda cell: value1 > cell, expected) - def test_dunder_method_inverted_order_wrapped_in_cell(self, value1: float, value2: float, expected: bool) -> None: + def test_dunder_method_inverted_order_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: bool | None, + ) -> None: assert_cell_operation_works(value2, lambda cell: _LazyCell(pl.lit(value1)) > cell, expected) - def test_named_method(self, value1: float, value2: float, expected: bool) -> None: + def test_named_method(self, value1: float | None, value2: float | None, expected: bool | None) -> None: assert_cell_operation_works(value1, lambda cell: cell.gt(value2), expected) - def test_named_method_wrapped_in_cell(self, value1: float, value2: float, expected: bool) -> None: + def test_named_method_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: bool | None, + ) -> None: assert_cell_operation_works(value1, lambda cell: cell.gt(_LazyCell(pl.lit(value2))), expected) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_hash.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_hash.py index 6270f0e14..7c5a6d312 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_hash.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_hash.py @@ -1,27 +1,85 @@ -from typing import Any +from collections.abc import Callable import polars as pl import pytest +from syrupy import SnapshotAssertion from safeds.data.tabular.containers import Cell from safeds.data.tabular.containers._lazy_cell import _LazyCell -def test_should_be_deterministic() -> None: - cell: Cell[Any] = _LazyCell(pl.col("a")) - assert hash(cell) == 8162512882156938440 +@pytest.mark.parametrize( + "cell_factory", + [ + lambda: Cell.constant(1), + lambda: Cell.date(2025, 1, 15), + lambda: Cell.date(_LazyCell(pl.col("a")), 1, 15), + lambda: _LazyCell(pl.col("a")), + ], + ids=[ + "constant", + "date, int", + "date, column", + "column", + ], +) +class TestContract: + def test_should_return_same_hash_for_equal_objects(self, cell_factory: Callable[[], Cell]) -> None: + cell_1 = cell_factory() + cell_2 = cell_factory() + assert hash(cell_1) == hash(cell_2) + + def test_should_return_same_hash_in_different_processes( + self, + cell_factory: Callable[[], Cell], + snapshot: SnapshotAssertion, + ) -> None: + cell = cell_factory() + assert hash(cell) == snapshot @pytest.mark.parametrize( - ("cell1", "cell2", "expected"), + ("cell_1", "cell_2"), [ - (_LazyCell(pl.col("a")), _LazyCell(pl.col("a")), True), - (_LazyCell(pl.col("a")), _LazyCell(pl.col("b")), False), + # different constant value + ( + Cell.constant(1), + Cell.constant(2), + ), + # different constant type + ( + Cell.constant(1), + Cell.constant("1"), + ), + # different date, int + ( + Cell.date(2025, 1, 15), + Cell.date(2024, 1, 15), + ), + # different date, column + ( + Cell.date(_LazyCell(pl.col("a")), 1, 15), + Cell.date(_LazyCell(pl.col("b")), 1, 15), + ), + # different column + ( + _LazyCell(pl.col("a")), + _LazyCell(pl.col("b")), + ), + # different cell kinds + ( + Cell.date(23, 1, 15), + Cell.time(23, 1, 15), + ), ], ids=[ - "equal", - "different", + "different constant value", + "different constant type", + "different date, int", + "different date, column", + "different column", + "different cell kinds", ], ) -def test_should_be_good_hash(cell1: Cell, cell2: Cell, expected: bool) -> None: - assert (hash(cell1) == hash(cell2)) == expected +def test_should_be_good_hash(cell_1: Cell, cell_2: Cell) -> None: + assert hash(cell_1) != hash(cell_2) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_le.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_le.py index 2f4e43806..939ecede3 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_le.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_le.py @@ -12,29 +12,53 @@ (3, 1.5, False), (1.5, 3, True), (1.5, 1.5, True), + (None, 3, None), + (3, None, None), ], ids=[ "int - int", "int - float", "float - int", "float - float", + "left is None", + "right is None", ], ) class TestShouldComputeLessThanOrEqual: - def test_dunder_method(self, value1: float, value2: float, expected: bool) -> None: + def test_dunder_method(self, value1: float | None, value2: float | None, expected: bool | None) -> None: assert_cell_operation_works(value1, lambda cell: cell <= value2, expected) - def test_dunder_method_wrapped_in_cell(self, value1: float, value2: float, expected: bool) -> None: + def test_dunder_method_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: bool | None, + ) -> None: assert_cell_operation_works(value1, lambda cell: cell <= _LazyCell(pl.lit(value2)), expected) - def test_dunder_method_inverted_order(self, value1: float, value2: float, expected: bool) -> None: + def test_dunder_method_inverted_order( + self, + value1: float | None, + value2: float | None, + expected: bool | None, + ) -> None: assert_cell_operation_works(value2, lambda cell: value1 <= cell, expected) - def test_dunder_method_inverted_order_wrapped_in_cell(self, value1: float, value2: float, expected: bool) -> None: + def test_dunder_method_inverted_order_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: bool | None, + ) -> None: assert_cell_operation_works(value2, lambda cell: _LazyCell(pl.lit(value1)) <= cell, expected) - def test_named_method(self, value1: float, value2: float, expected: bool) -> None: + def test_named_method(self, value1: float | None, value2: float | None, expected: bool | None) -> None: assert_cell_operation_works(value1, lambda cell: cell.le(value2), expected) - def test_named_method_wrapped_in_cell(self, value1: float, value2: float, expected: bool) -> None: + def test_named_method_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: bool | None, + ) -> None: assert_cell_operation_works(value1, lambda cell: cell.le(_LazyCell(pl.lit(value2))), expected) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_lt.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_lt.py index 45280e2cd..6924d3d13 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_lt.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_lt.py @@ -12,29 +12,53 @@ (3, 1.5, False), (1.5, 3, True), (1.5, 1.5, False), + (None, 3, None), + (3, None, None), ], ids=[ "int - int", "int - float", "float - int", "float - float", + "left is None", + "right is None", ], ) class TestShouldComputeLessThan: - def test_dunder_method(self, value1: float, value2: float, expected: bool) -> None: + def test_dunder_method(self, value1: float | None, value2: float | None, expected: bool | None) -> None: assert_cell_operation_works(value1, lambda cell: cell < value2, expected) - def test_dunder_method_wrapped_in_cell(self, value1: float, value2: float, expected: bool) -> None: + def test_dunder_method_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: bool | None, + ) -> None: assert_cell_operation_works(value1, lambda cell: cell < _LazyCell(pl.lit(value2)), expected) - def test_dunder_method_inverted_order(self, value1: float, value2: float, expected: bool) -> None: + def test_dunder_method_inverted_order( + self, + value1: float | None, + value2: float | None, + expected: bool | None, + ) -> None: assert_cell_operation_works(value2, lambda cell: value1 < cell, expected) - def test_dunder_method_inverted_order_wrapped_in_cell(self, value1: float, value2: float, expected: bool) -> None: + def test_dunder_method_inverted_order_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: bool | None, + ) -> None: assert_cell_operation_works(value2, lambda cell: _LazyCell(pl.lit(value1)) < cell, expected) - def test_named_method(self, value1: float, value2: float, expected: bool) -> None: + def test_named_method(self, value1: float | None, value2: float | None, expected: bool | None) -> None: assert_cell_operation_works(value1, lambda cell: cell.lt(value2), expected) - def test_named_method_wrapped_in_cell(self, value1: float, value2: float, expected: bool) -> None: + def test_named_method_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: bool | None, + ) -> None: assert_cell_operation_works(value1, lambda cell: cell.lt(_LazyCell(pl.lit(value2))), expected) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_mod.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_mod.py index f1d960e62..804b00e39 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_mod.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_mod.py @@ -12,29 +12,53 @@ (3, 1.5, 0.0), (1.5, 3, 1.5), (1.5, 1.5, 0.0), + (None, 3, None), + (3, None, None), ], ids=[ "int - int", "int - float", "float - int", "float - float", + "left is None", + "right is None", ], ) class TestShouldComputeModulus: - def test_dunder_method(self, value1: float, value2: float, expected: float) -> None: + def test_dunder_method(self, value1: float | None, value2: float | None, expected: float | None) -> None: assert_cell_operation_works(value1, lambda cell: cell % value2, expected) - def test_dunder_method_wrapped_in_cell(self, value1: float, value2: float, expected: float) -> None: + def test_dunder_method_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: float | None, + ) -> None: assert_cell_operation_works(value1, lambda cell: cell % _LazyCell(pl.lit(value2)), expected) - def test_dunder_method_inverted_order(self, value1: float, value2: float, expected: float) -> None: + def test_dunder_method_inverted_order( + self, + value1: float | None, + value2: float | None, + expected: float | None, + ) -> None: assert_cell_operation_works(value2, lambda cell: value1 % cell, expected) - def test_dunder_method_inverted_order_wrapped_in_cell(self, value1: float, value2: float, expected: float) -> None: + def test_dunder_method_inverted_order_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: float | None, + ) -> None: assert_cell_operation_works(value2, lambda cell: _LazyCell(pl.lit(value1)) % cell, expected) - def test_named_method(self, value1: float, value2: float, expected: float) -> None: + def test_named_method(self, value1: float | None, value2: float | None, expected: float | None) -> None: assert_cell_operation_works(value1, lambda cell: cell.mod(value2), expected) - def test_named_method_wrapped_in_cell(self, value1: float, value2: float, expected: float) -> None: + def test_named_method_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: float | None, + ) -> None: assert_cell_operation_works(value1, lambda cell: cell.mod(_LazyCell(pl.lit(value2))), expected) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_mul.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_mul.py index 279522aa1..b7e56dbe8 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_mul.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_mul.py @@ -12,29 +12,53 @@ (3, 1.5, 4.5), (1.5, 3, 4.5), (1.5, 1.5, 2.25), + (None, 3, None), + (3, None, None), ], ids=[ "int - int", "int - float", "float - int", "float - float", + "left is None", + "right is None", ], ) class TestShouldComputeMultiplication: - def test_dunder_method(self, value1: float, value2: float, expected: float) -> None: + def test_dunder_method(self, value1: float | None, value2: float | None, expected: float | None) -> None: assert_cell_operation_works(value1, lambda cell: cell * value2, expected) - def test_dunder_method_wrapped_in_cell(self, value1: float, value2: float, expected: float) -> None: + def test_dunder_method_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: float | None, + ) -> None: assert_cell_operation_works(value1, lambda cell: cell * _LazyCell(pl.lit(value2)), expected) - def test_dunder_method_inverted_order(self, value1: float, value2: float, expected: float) -> None: + def test_dunder_method_inverted_order( + self, + value1: float | None, + value2: float | None, + expected: float | None, + ) -> None: assert_cell_operation_works(value2, lambda cell: value1 * cell, expected) - def test_dunder_method_inverted_order_wrapped_in_cell(self, value1: float, value2: float, expected: float) -> None: + def test_dunder_method_inverted_order_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: float | None, + ) -> None: assert_cell_operation_works(value2, lambda cell: _LazyCell(pl.lit(value1)) * cell, expected) - def test_named_method(self, value1: float, value2: float, expected: float) -> None: + def test_named_method(self, value1: float | None, value2: float | None, expected: float | None) -> None: assert_cell_operation_works(value1, lambda cell: cell.mul(value2), expected) - def test_named_method_wrapped_in_cell(self, value1: float, value2: float, expected: float) -> None: + def test_named_method_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: float | None, + ) -> None: assert_cell_operation_works(value1, lambda cell: cell.mul(_LazyCell(pl.lit(value2))), expected) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_ne.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_ne.py deleted file mode 100644 index be55a9c41..000000000 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_ne.py +++ /dev/null @@ -1,40 +0,0 @@ -import polars as pl -import pytest - -from safeds.data.tabular.containers._lazy_cell import _LazyCell -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("value1", "value2", "expected"), - [ - (3, 3, False), - (3, 1.5, True), - (1.5, 3, True), - (1.5, 1.5, False), - ], - ids=[ - "int - int", - "int - float", - "float - int", - "float - float", - ], -) -class TestShouldComputeNegatedEquality: - def test_dunder_method(self, value1: float, value2: float, expected: bool) -> None: - assert_cell_operation_works(value1, lambda cell: cell != value2, expected) - - def test_dunder_method_wrapped_in_cell(self, value1: float, value2: float, expected: bool) -> None: - assert_cell_operation_works(value1, lambda cell: cell != _LazyCell(pl.lit(value2)), expected) - - def test_dunder_method_inverted_order(self, value1: float, value2: float, expected: bool) -> None: - assert_cell_operation_works(value1, lambda cell: value2 != cell, expected) # type: ignore[arg-type,return-value] - - def test_dunder_method_inverted_order_wrapped_in_cell(self, value1: float, value2: float, expected: bool) -> None: - assert_cell_operation_works(value1, lambda cell: _LazyCell(pl.lit(value2)) != cell, expected) # type: ignore[arg-type,return-value] - - def test_named_method(self, value1: float, value2: float, expected: bool) -> None: - assert_cell_operation_works(value1, lambda cell: cell.neq(value2), expected) - - def test_named_method_wrapped_in_cell(self, value1: float, value2: float, expected: bool) -> None: - assert_cell_operation_works(value1, lambda cell: cell.neq(_LazyCell(pl.lit(value2))), expected) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_neg.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_neg.py index 306fdd530..47ed35840 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_neg.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_neg.py @@ -1,5 +1,6 @@ import pytest +from safeds.data.tabular.typing import ColumnType from tests.helpers import assert_cell_operation_works @@ -12,6 +13,7 @@ (10.5, -10.5), (-10, 10), (-10.5, 10.5), + (None, None), ], ids=[ "zero int", @@ -20,11 +22,12 @@ "positive float", "negative int", "negative float", + "None", ], ) -class TestShouldNegateValueOfCell: - def test_dunder_method(self, value: float, expected: float) -> None: - assert_cell_operation_works(value, lambda cell: -cell, expected) +class TestShouldNegateValue: + def test_dunder_method(self, value: float | None, expected: float | None) -> None: + assert_cell_operation_works(value, lambda cell: -cell, expected, type_if_none=ColumnType.float64()) - def test_named_method(self, value: float, expected: float) -> None: - assert_cell_operation_works(value, lambda cell: cell.neg(), expected) + def test_named_method(self, value: float | None, expected: float | None) -> None: + assert_cell_operation_works(value, lambda cell: cell.neg(), expected, type_if_none=ColumnType.float64()) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_neq.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_neq.py new file mode 100644 index 000000000..efa0cfec4 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_neq.py @@ -0,0 +1,99 @@ +import polars as pl +import pytest + +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value1", "value2", "expected"), + [ + (3, 3, False), + (3, 1.5, True), + (1.5, 3, True), + (1.5, 1.5, False), + (None, 3, None), + (3, None, None), + ], + ids=[ + "int - int", + "int - float", + "float - int", + "float - float", + "left is None", + "right is None", + ], +) +class TestShouldComputeNegatedEquality: + def test_dunder_method(self, value1: float | None, value2: float | None, expected: bool | None) -> None: + assert_cell_operation_works(value1, lambda cell: cell != value2, expected) + + def test_dunder_method_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: bool | None, + ) -> None: + assert_cell_operation_works(value1, lambda cell: cell != _LazyCell(pl.lit(value2)), expected) + + def test_dunder_method_inverted_order( + self, + value1: float | None, + value2: float | None, + expected: bool | None, + ) -> None: + assert_cell_operation_works(value1, lambda cell: value2 != cell, expected) # type: ignore[arg-type,return-value] + + def test_dunder_method_inverted_order_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: bool | None, + ) -> None: + assert_cell_operation_works(value1, lambda cell: _LazyCell(pl.lit(value2)) != cell, expected) # type: ignore[arg-type,return-value] + + def test_named_method(self, value1: float | None, value2: float | None, expected: bool | None) -> None: + assert_cell_operation_works(value1, lambda cell: cell.neq(value2), expected) + + def test_named_method_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: bool | None, + ) -> None: + assert_cell_operation_works(value1, lambda cell: cell.neq(_LazyCell(pl.lit(value2))), expected) + + +@pytest.mark.parametrize( + ("value1", "value2", "expected"), + [ + (None, 3, True), + (3, None, True), + (None, None, False), + ], + ids=[ + "left is None", + "right is None", + "both are None", + ], +) +class TestShouldComputeNegatedEqualityWithoutPropagatingMissingValues: + def test_named_method( + self, + value1: float | None, + value2: float | None, + expected: bool | None, + ) -> None: + assert_cell_operation_works(value1, lambda cell: cell.neq(value2, propagate_missing_values=False), expected) + + def test_named_method_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: bool | None, + ) -> None: + assert_cell_operation_works( + value1, + lambda cell: cell.neq(_LazyCell(pl.lit(value2)), propagate_missing_values=False), + expected, + ) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_not.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_not.py index 6381200b7..c0619094c 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_not.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_not.py @@ -2,6 +2,7 @@ import pytest +from safeds.data.tabular.typing import ColumnType from tests.helpers import assert_cell_operation_works @@ -10,19 +11,21 @@ [ (False, True), (True, False), + (None, None), (0, True), (1, False), ], ids=[ - "false", - "true", + "False", + "True", + "None", "falsy int", "truthy int", ], ) class TestShouldInvertValueOfCell: - def test_dunder_method(self, value: Any, expected: bool) -> None: - assert_cell_operation_works(value, lambda cell: ~cell, expected) + def test_dunder_method(self, value: Any, expected: bool | None) -> None: + assert_cell_operation_works(value, lambda cell: ~cell, expected, type_if_none=ColumnType.boolean()) - def test_named_method(self, value: Any, expected: bool) -> None: - assert_cell_operation_works(value, lambda cell: cell.not_(), expected) + def test_named_method(self, value: Any, expected: bool | None) -> None: + assert_cell_operation_works(value, lambda cell: cell.not_(), expected, type_if_none=ColumnType.boolean()) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_or.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_or.py index 6220b4011..0d19834bc 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_or.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_or.py @@ -4,6 +4,7 @@ import pytest from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.typing import ColumnType from tests.helpers import assert_cell_operation_works @@ -12,39 +13,69 @@ [ (False, False, False), (False, True, True), + (False, None, None), (True, False, True), (True, True, True), + (True, None, True), + (None, False, None), + (None, True, True), + (None, None, None), (0, False, False), (0, True, True), (1, False, True), (1, True, True), ], ids=[ - "false - false", - "false - true", - "true - false", - "true - true", - "falsy int - false", - "falsy int - true", - "truthy int - false", - "truthy int - true", + "False - False", + "False - True", + "False - None", + "True - False", + "True - True", + "True - None", + "None - False", + "None - True", + "None - None", + "falsy int - False", + "falsy int - True", + "truthy int - False", + "truthy int - True", ], ) class TestShouldComputeDisjunction: - def test_dunder_method(self, value1: Any, value2: bool, expected: bool) -> None: - assert_cell_operation_works(value1, lambda cell: cell | value2, expected) + def test_dunder_method(self, value1: Any, value2: bool | None, expected: bool | None) -> None: + assert_cell_operation_works(value1, lambda cell: cell | value2, expected, type_if_none=ColumnType.boolean()) - def test_dunder_method_wrapped_in_cell(self, value1: Any, value2: bool, expected: bool) -> None: - assert_cell_operation_works(value1, lambda cell: cell | _LazyCell(pl.lit(value2)), expected) + def test_dunder_method_wrapped_in_cell(self, value1: Any, value2: bool | None, expected: bool | None) -> None: + assert_cell_operation_works( + value1, + lambda cell: cell | _LazyCell(pl.lit(value2)), + expected, + type_if_none=ColumnType.boolean(), + ) - def test_dunder_method_inverted_order(self, value1: Any, value2: bool, expected: bool) -> None: - assert_cell_operation_works(value2, lambda cell: value1 | cell, expected) + def test_dunder_method_inverted_order(self, value1: Any, value2: bool | None, expected: bool | None) -> None: + assert_cell_operation_works(value2, lambda cell: value1 | cell, expected, type_if_none=ColumnType.boolean()) - def test_dunder_method_inverted_order_wrapped_in_cell(self, value1: Any, value2: bool, expected: bool) -> None: - assert_cell_operation_works(value2, lambda cell: _LazyCell(pl.lit(value1)) | cell, expected) + def test_dunder_method_inverted_order_wrapped_in_cell( + self, + value1: Any, + value2: bool | None, + expected: bool | None, + ) -> None: + assert_cell_operation_works( + value2, + lambda cell: _LazyCell(pl.lit(value1)) | cell, + expected, + type_if_none=ColumnType.boolean(), + ) - def test_named_method(self, value1: Any, value2: bool, expected: bool) -> None: - assert_cell_operation_works(value1, lambda cell: cell.or_(value2), expected) + def test_named_method(self, value1: Any, value2: bool | None, expected: bool | None) -> None: + assert_cell_operation_works(value1, lambda cell: cell.or_(value2), expected, type_if_none=ColumnType.boolean()) - def test_named_method_wrapped_in_cell(self, value1: Any, value2: bool, expected: bool) -> None: - assert_cell_operation_works(value1, lambda cell: cell.or_(_LazyCell(pl.lit(value2))), expected) + def test_named_method_wrapped_in_cell(self, value1: Any, value2: bool | None, expected: bool | None) -> None: + assert_cell_operation_works( + value1, + lambda cell: cell.or_(_LazyCell(pl.lit(value2))), + expected, + type_if_none=ColumnType.boolean(), + ) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_pos.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_pos.py index da37cc41f..ec8bebf85 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_pos.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_pos.py @@ -1,5 +1,6 @@ import pytest +from safeds.data.tabular.typing import ColumnType from tests.helpers import assert_cell_operation_works @@ -12,6 +13,7 @@ (10.5, 10.5), (-10, -10), (-10.5, -10.5), + (None, None), ], ids=[ "zero int", @@ -20,8 +22,9 @@ "positive float", "negative int", "negative float", + "None", ], ) -class TestShouldReturnValueOfCell: - def test_dunder_method(self, value: float, expected: float) -> None: - assert_cell_operation_works(value, lambda cell: +cell, expected) +class TestShouldReturnValue: + def test_dunder_method(self, value: float | None, expected: float | None) -> None: + assert_cell_operation_works(value, lambda cell: +cell, expected, type_if_none=ColumnType.float64()) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_pow.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_pow.py index a977b5e92..e0a833c88 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_pow.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_pow.py @@ -2,6 +2,7 @@ import pytest from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.typing import ColumnType from tests.helpers import assert_cell_operation_works @@ -12,29 +13,77 @@ (4, 0.5, 2.0), (1.5, 2, 2.25), (2.25, 0.5, 1.5), + (None, 3, None), + (3, None, None), ], ids=[ "int - int", "int - float", "float - int", "float - float", + "left is None", + "right is None", ], ) class TestShouldComputePower: - def test_dunder_method(self, value1: float, value2: float, expected: float) -> None: - assert_cell_operation_works(value1, lambda cell: cell**value2, expected) + def test_dunder_method(self, value1: float | None, value2: float | None, expected: float | None) -> None: + if value2 is None: + pytest.skip("polars does not support null exponents.") - def test_dunder_method_wrapped_in_cell(self, value1: float, value2: float, expected: float) -> None: - assert_cell_operation_works(value1, lambda cell: cell ** _LazyCell(pl.lit(value2)), expected) + assert_cell_operation_works(value1, lambda cell: cell**value2, expected, type_if_none=ColumnType.float64()) - def test_dunder_method_inverted_order(self, value1: float, value2: float, expected: float) -> None: - assert_cell_operation_works(value2, lambda cell: value1**cell, expected) + def test_dunder_method_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: float | None, + ) -> None: + assert_cell_operation_works( + value1, + lambda cell: cell ** _LazyCell(pl.lit(value2, dtype=pl.Float64())), + expected, + type_if_none=ColumnType.float64(), + ) - def test_dunder_method_inverted_order_wrapped_in_cell(self, value1: float, value2: float, expected: float) -> None: - assert_cell_operation_works(value2, lambda cell: _LazyCell(pl.lit(value1)) ** cell, expected) + def test_dunder_method_inverted_order( + self, + value1: float | None, + value2: float | None, + expected: float | None, + ) -> None: + if value1 is None: + pytest.skip("polars does not support null base.") - def test_named_method(self, value1: float, value2: float, expected: float) -> None: - assert_cell_operation_works(value1, lambda cell: cell.pow(value2), expected) + assert_cell_operation_works(value2, lambda cell: value1**cell, expected, type_if_none=ColumnType.float64()) - def test_named_method_wrapped_in_cell(self, value1: float, value2: float, expected: float) -> None: - assert_cell_operation_works(value1, lambda cell: cell.pow(_LazyCell(pl.lit(value2))), expected) + def test_dunder_method_inverted_order_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: float | None, + ) -> None: + assert_cell_operation_works( + value2, + lambda cell: _LazyCell(pl.lit(value1, dtype=pl.Float64())) ** cell, + expected, + type_if_none=ColumnType.float64(), + ) + + def test_named_method(self, value1: float | None, value2: float | None, expected: float | None) -> None: + if value2 is None: + pytest.skip("polars does not support null exponents.") + + assert_cell_operation_works(value1, lambda cell: cell.pow(value2), expected, type_if_none=ColumnType.float64()) + + def test_named_method_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: float | None, + ) -> None: + assert_cell_operation_works( + value1, + lambda cell: cell.pow(_LazyCell(pl.lit(value2, dtype=pl.Float64()))), + expected, + type_if_none=ColumnType.float64(), + ) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_repr.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_repr.py new file mode 100644 index 000000000..91313f5ad --- /dev/null +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_repr.py @@ -0,0 +1,27 @@ +import polars as pl +import pytest + +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.containers._lazy_cell import _LazyCell + + +@pytest.mark.parametrize( + ("cell", "expected"), + [ + ( + Cell.constant(1), + "dyn int: 1", + ), + ( + _LazyCell(pl.col("a")), + 'col("a")', + ), + ], + ids=[ + "constant", + "column", + ], +) +def test_should_return_a_string_representation(cell: Cell, expected: str) -> None: + # We do not care about the exact string representation, this is only for debugging + assert expected in repr(cell) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_sizeof.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_sizeof.py index 3bd544fc5..18f0a2017 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_sizeof.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_sizeof.py @@ -1,14 +1,26 @@ import sys -from typing import TYPE_CHECKING, Any import polars as pl +import pytest +from safeds.data.tabular.containers import Cell from safeds.data.tabular.containers._lazy_cell import _LazyCell -if TYPE_CHECKING: - from safeds.data.tabular.containers import Cell - -def test_should_return_size_greater_than_normal_object() -> None: - cell: Cell[Any] = _LazyCell(pl.col("a")) +@pytest.mark.parametrize( + "cell", + [ + Cell.constant(1), + Cell.date(2025, 1, 15), + Cell.date(_LazyCell(pl.col("a")), 1, 15), + _LazyCell(pl.col("a")), + ], + ids=[ + "constant", + "date, int", + "date, column", + "column", + ], +) +def test_should_be_larger_than_normal_object(cell: Cell) -> None: assert sys.getsizeof(cell) > sys.getsizeof(object()) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_str.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_str.py new file mode 100644 index 000000000..5fd8d0d0c --- /dev/null +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_str.py @@ -0,0 +1,27 @@ +import polars as pl +import pytest + +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.containers._lazy_cell import _LazyCell + + +@pytest.mark.parametrize( + ("cell", "expected"), + [ + ( + Cell.constant(1), + "dyn int: 1", + ), + ( + _LazyCell(pl.col("a")), + 'col("a")', + ), + ], + ids=[ + "constant", + "column", + ], +) +def test_should_return_a_string_representation(cell: Cell, expected: str) -> None: + # We do not care about the exact string representation, this is only for debugging + assert str(cell) == expected diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_sub.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_sub.py index 15593ae79..b7a680f3d 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_sub.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_sub.py @@ -12,29 +12,53 @@ (3, 1.5, 1.5), (1.5, 3, -1.5), (1.5, 1.5, 0.0), + (None, 3, None), + (3, None, None), ], ids=[ "int - int", "int - float", "float - int", "float - float", + "left is None", + "right is None", ], ) class TestShouldComputeSubtraction: - def test_dunder_method(self, value1: float, value2: float, expected: float) -> None: + def test_dunder_method(self, value1: float | None, value2: float | None, expected: float | None) -> None: assert_cell_operation_works(value1, lambda cell: cell - value2, expected) - def test_dunder_method_wrapped_in_cell(self, value1: float, value2: float, expected: float) -> None: + def test_dunder_method_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: float | None, + ) -> None: assert_cell_operation_works(value1, lambda cell: cell - _LazyCell(pl.lit(value2)), expected) - def test_dunder_method_inverted_order(self, value1: float, value2: float, expected: float) -> None: + def test_dunder_method_inverted_order( + self, + value1: float | None, + value2: float | None, + expected: float | None, + ) -> None: assert_cell_operation_works(value2, lambda cell: value1 - cell, expected) - def test_dunder_method_inverted_order_wrapped_in_cell(self, value1: float, value2: float, expected: float) -> None: + def test_dunder_method_inverted_order_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: float | None, + ) -> None: assert_cell_operation_works(value2, lambda cell: _LazyCell(pl.lit(value1)) - cell, expected) - def test_named_method(self, value1: float, value2: float, expected: float) -> None: + def test_named_method(self, value1: float | None, value2: float | None, expected: float | None) -> None: assert_cell_operation_works(value1, lambda cell: cell.sub(value2), expected) - def test_named_method_wrapped_in_cell(self, value1: float, value2: float, expected: float) -> None: + def test_named_method_wrapped_in_cell( + self, + value1: float | None, + value2: float | None, + expected: float | None, + ) -> None: assert_cell_operation_works(value1, lambda cell: cell.sub(_LazyCell(pl.lit(value2))), expected) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_time.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_time.py new file mode 100644 index 000000000..34e5c3367 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_time.py @@ -0,0 +1,60 @@ +from datetime import time + +import pytest + +from safeds._typing import _ConvertibleToIntCell +from safeds.data.tabular.containers import Cell +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("hour", "minute", "second", "microsecond", "expected"), + [ + (1, 2, 3, 4, time(1, 2, 3, 4)), + (Cell.constant(1), Cell.constant(2), Cell.constant(3), Cell.constant(4), time(1, 2, 3, 4)), + # invalid hour + (None, 2, 3, 4, None), + (-1, 2, 3, 4, None), + (24, 2, 3, 4, None), + # invalid minute + (1, None, 3, 4, None), + (1, -1, 3, 4, None), + (1, 60, 3, 4, None), + # invalid second + (1, 2, None, 4, None), + (1, 2, -1, 4, None), + (1, 2, 60, 4, None), + # invalid microsecond + (1, 2, 3, None, None), + (1, 2, 3, -1, None), + (1, 2, 3, 1_000_000, None), + ], + ids=[ + "int components", + "cell components", + "hour is None", + "hour is too low", + "hour is too high", + "minute is None", + "minute is too low", + "minute is too high", + "second is None", + "second is too low", + "second is too high", + "microsecond is None", + "microsecond is too low", + "microsecond is too high", + ], +) +def test_should_return_time( + hour: _ConvertibleToIntCell, + minute: _ConvertibleToIntCell, + second: _ConvertibleToIntCell, + microsecond: _ConvertibleToIntCell, + expected: time, +) -> None: + assert_cell_operation_works( + None, + lambda _: Cell.time(hour, minute, second, microsecond=microsecond), + expected, + ) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_xor.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_xor.py index e62398512..995e6e0e7 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_xor.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_xor.py @@ -4,6 +4,7 @@ import pytest from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.typing import ColumnType from tests.helpers import assert_cell_operation_works @@ -12,39 +13,69 @@ [ (False, False, False), (False, True, True), + (False, None, None), (True, False, True), (True, True, False), + (True, None, None), + (None, False, None), + (None, True, None), + (None, None, None), (0, False, False), (0, True, True), (1, False, True), (1, True, False), ], ids=[ - "false - false", - "false - true", - "true - false", - "true - true", - "falsy int - false", - "falsy int - true", - "truthy int - false", - "truthy int - true", + "False - False", + "False - True", + "False - None", + "True - False", + "True - True", + "True - None", + "None - False", + "None - True", + "None - None", + "falsy int - False", + "falsy int - True", + "truthy int - False", + "truthy int - True", ], ) class TestShouldComputeExclusiveOr: - def test_dunder_method(self, value1: Any, value2: bool, expected: bool) -> None: - assert_cell_operation_works(value1, lambda cell: cell ^ value2, expected) + def test_dunder_method(self, value1: Any, value2: bool | None, expected: bool | None) -> None: + assert_cell_operation_works(value1, lambda cell: cell ^ value2, expected, type_if_none=ColumnType.boolean()) - def test_dunder_method_wrapped_in_cell(self, value1: Any, value2: bool, expected: bool) -> None: - assert_cell_operation_works(value1, lambda cell: cell ^ _LazyCell(pl.lit(value2)), expected) + def test_dunder_method_wrapped_in_cell(self, value1: Any, value2: bool | None, expected: bool | None) -> None: + assert_cell_operation_works( + value1, + lambda cell: cell ^ _LazyCell(pl.lit(value2)), + expected, + type_if_none=ColumnType.boolean(), + ) - def test_dunder_method_inverted_order(self, value1: Any, value2: bool, expected: bool) -> None: - assert_cell_operation_works(value2, lambda cell: value1 ^ cell, expected) + def test_dunder_method_inverted_order(self, value1: Any, value2: bool | None, expected: bool | None) -> None: + assert_cell_operation_works(value2, lambda cell: value1 ^ cell, expected, type_if_none=ColumnType.boolean()) - def test_dunder_method_inverted_order_wrapped_in_cell(self, value1: Any, value2: bool, expected: bool) -> None: - assert_cell_operation_works(value2, lambda cell: _LazyCell(pl.lit(value1)) ^ cell, expected) + def test_dunder_method_inverted_order_wrapped_in_cell( + self, + value1: Any, + value2: bool | None, + expected: bool | None, + ) -> None: + assert_cell_operation_works( + value2, + lambda cell: _LazyCell(pl.lit(value1)) ^ cell, + expected, + type_if_none=ColumnType.boolean(), + ) - def test_named_method(self, value1: Any, value2: bool, expected: bool) -> None: - assert_cell_operation_works(value1, lambda cell: cell.xor(value2), expected) + def test_named_method(self, value1: Any, value2: bool | None, expected: bool | None) -> None: + assert_cell_operation_works(value1, lambda cell: cell.xor(value2), expected, type_if_none=ColumnType.boolean()) - def test_named_method_wrapped_in_cell(self, value1: Any, value2: bool, expected: bool) -> None: - assert_cell_operation_works(value1, lambda cell: cell.xor(_LazyCell(pl.lit(value2))), expected) + def test_named_method_wrapped_in_cell(self, value1: Any, value2: bool | None, expected: bool | None) -> None: + assert_cell_operation_works( + value1, + lambda cell: cell.xor(_LazyCell(pl.lit(value2))), + expected, + type_if_none=ColumnType.boolean(), + ) diff --git a/tests/safeds/data/tabular/containers/_table/test_add_computed_column.py b/tests/safeds/data/tabular/containers/_table/test_add_computed_column.py index c0920d5de..69e8a2373 100644 --- a/tests/safeds/data/tabular/containers/_table/test_add_computed_column.py +++ b/tests/safeds/data/tabular/containers/_table/test_add_computed_column.py @@ -12,19 +12,19 @@ ( lambda: Table({}), "col1", - lambda _: Cell.from_literal(None), + lambda _: Cell.constant(None), Table({"col1": []}), ), ( lambda: Table({"col1": []}), "col2", - lambda _: Cell.from_literal(None), + lambda _: Cell.constant(None), Table({"col1": [], "col2": []}), ), ( lambda: Table({"col1": [1, 2, 3]}), "col2", - lambda _: Cell.from_literal(None), + lambda _: Cell.constant(None), Table({"col1": [1, 2, 3], "col2": [None, None, None]}), ), ( diff --git a/tests/safeds/data/tabular/containers/_table/test_filter_rows.py b/tests/safeds/data/tabular/containers/_table/test_filter_rows.py index a53034e7b..61d474b02 100644 --- a/tests/safeds/data/tabular/containers/_table/test_filter_rows.py +++ b/tests/safeds/data/tabular/containers/_table/test_filter_rows.py @@ -10,12 +10,12 @@ [ ( lambda: Table({}), - lambda _: Cell.from_literal(False), # noqa: FBT003 + lambda _: Cell.constant(False), # noqa: FBT003 Table({}), ), ( lambda: Table({"col1": []}), - lambda _: Cell.from_literal(False), # noqa: FBT003 + lambda _: Cell.constant(False), # noqa: FBT003 Table({"col1": []}), ), ( diff --git a/tests/safeds/data/tabular/containers/_table/test_filter_rows_by_column.py b/tests/safeds/data/tabular/containers/_table/test_filter_rows_by_column.py index 56a7f4c5e..58fe61989 100644 --- a/tests/safeds/data/tabular/containers/_table/test_filter_rows_by_column.py +++ b/tests/safeds/data/tabular/containers/_table/test_filter_rows_by_column.py @@ -12,7 +12,7 @@ ( lambda: Table({"col1": [], "col2": []}), "col1", - lambda _: Cell.from_literal(False), # noqa: FBT003 + lambda _: Cell.constant(False), # noqa: FBT003 Table({"col1": [], "col2": []}), ), ( diff --git a/tests/safeds/data/tabular/containers/_table/test_remove_rows.py b/tests/safeds/data/tabular/containers/_table/test_remove_rows.py index 3bd524981..8979bb22f 100644 --- a/tests/safeds/data/tabular/containers/_table/test_remove_rows.py +++ b/tests/safeds/data/tabular/containers/_table/test_remove_rows.py @@ -10,12 +10,12 @@ [ ( lambda: Table({}), - lambda _: Cell.from_literal(False), # noqa: FBT003 + lambda _: Cell.constant(False), # noqa: FBT003 Table({}), ), ( lambda: Table({"col1": []}), - lambda _: Cell.from_literal(False), # noqa: FBT003 + lambda _: Cell.constant(False), # noqa: FBT003 Table({"col1": []}), ), ( diff --git a/tests/safeds/data/tabular/containers/_table/test_remove_rows_by_column.py b/tests/safeds/data/tabular/containers/_table/test_remove_rows_by_column.py index 6b3061bb9..c757cc7de 100644 --- a/tests/safeds/data/tabular/containers/_table/test_remove_rows_by_column.py +++ b/tests/safeds/data/tabular/containers/_table/test_remove_rows_by_column.py @@ -12,7 +12,7 @@ ( lambda: Table({"col1": [], "col2": []}), "col1", - lambda _: Cell.from_literal(False), # noqa: FBT003 + lambda _: Cell.constant(False), # noqa: FBT003 Table({"col1": [], "col2": []}), ), ( diff --git a/tests/safeds/data/tabular/containers/_table/test_transform_columns.py b/tests/safeds/data/tabular/containers/_table/test_transform_columns.py index ecd079750..3a30da46a 100644 --- a/tests/safeds/data/tabular/containers/_table/test_transform_columns.py +++ b/tests/safeds/data/tabular/containers/_table/test_transform_columns.py @@ -13,7 +13,7 @@ ( lambda: Table({"col1": []}), "col1", - lambda _: Cell.from_literal(None), + lambda _: Cell.constant(None), Table({"col1": []}), ), # no rows (computed value) @@ -27,7 +27,7 @@ ( lambda: Table({"col1": [1, 2]}), "col1", - lambda _: Cell.from_literal(None), + lambda _: Cell.constant(None), Table({"col1": [None, None]}), ), # non-empty (computed value) @@ -41,7 +41,7 @@ ( lambda: Table({"col1": [1, 2], "col2": [3, 4]}), ["col1", "col2"], - lambda _: Cell.from_literal(None), + lambda _: Cell.constant(None), Table({"col1": [None, None], "col2": [None, None]}), ), # multiple columns transformed (computed value)