From dc249d3eda4a07a7b45e55a5f7cfc7fe7cb2cc77 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Wed, 15 Jan 2025 18:47:37 +0100 Subject: [PATCH 01/57] feat: move `StringCell` and `TemporalCell` classes into `query` package They do are not really containers, just a namespace for computations. --- src/safeds/_typing/__init__.py | 8 +-- .../data/tabular/containers/__init__.py | 6 --- src/safeds/data/tabular/containers/_cell.py | 52 +++++++++---------- src/safeds/data/tabular/containers/_column.py | 25 +++++---- .../data/tabular/containers/_lazy_cell.py | 40 +++++++------- src/safeds/data/tabular/containers/_table.py | 15 +++--- src/safeds/data/tabular/query/__init__.py | 22 ++++++++ .../_lazy_string_cell.py | 31 ++++++----- .../_lazy_temporal_cell.py | 25 +++++---- .../{containers => query}/_string_cell.py | 24 +++++---- .../{containers => query}/_temporal_cell.py | 19 ++++--- .../_lazy_string_cell => query}/__init__.py | 0 .../_lazy_string_cell}/__init__.py | 0 .../_lazy_string_cell/test_contains.py | 0 .../_lazy_string_cell/test_ends_with.py | 0 .../_lazy_string_cell/test_equals.py | 5 +- .../_lazy_string_cell/test_hash.py | 4 +- .../_lazy_string_cell/test_index_of.py | 0 .../_lazy_string_cell/test_length.py | 0 .../_lazy_string_cell/test_replace.py | 0 .../_lazy_string_cell/test_sizeof.py | 2 +- .../_lazy_string_cell/test_starts_with.py | 0 .../_lazy_string_cell/test_substring.py | 0 .../_lazy_string_cell/test_to_date.py | 0 .../_lazy_string_cell/test_to_datetime.py | 0 .../_lazy_string_cell/test_to_float.py | 0 .../_lazy_string_cell/test_to_int.py | 0 .../_lazy_string_cell/test_to_lowercase.py | 0 .../_lazy_string_cell/test_to_uppercase.py | 0 .../_lazy_string_cell/test_trim.py | 0 .../_lazy_string_cell/test_trim_end.py | 0 .../_lazy_string_cell/test_trim_start.py | 0 .../query/_lazy_temporal_cell/__init__.py | 0 .../_lazy_temporal_cell/test_century.py | 0 .../test_date_to_string.py | 0 .../test_datetime_to_string.py | 0 .../_lazy_temporal_cell/test_day.py | 0 .../_lazy_temporal_cell/test_equals.py | 5 +- .../_lazy_temporal_cell/test_hash.py | 4 +- .../_lazy_temporal_cell/test_month.py | 0 .../_lazy_temporal_cell/test_sizeof.py | 2 +- .../_lazy_temporal_cell/test_week.py | 0 .../_lazy_temporal_cell/test_weekday.py | 0 .../_lazy_temporal_cell/test_year.py | 0 44 files changed, 153 insertions(+), 136 deletions(-) create mode 100644 src/safeds/data/tabular/query/__init__.py rename src/safeds/data/tabular/{containers => query}/_lazy_string_cell.py (81%) rename src/safeds/data/tabular/{containers => query}/_lazy_temporal_cell.py (89%) rename src/safeds/data/tabular/{containers => query}/_string_cell.py (96%) rename src/safeds/data/tabular/{containers => query}/_temporal_cell.py (95%) rename tests/safeds/data/tabular/{containers/_lazy_string_cell => query}/__init__.py (100%) rename tests/safeds/data/tabular/{containers/_lazy_temporal_cell => query/_lazy_string_cell}/__init__.py (100%) rename tests/safeds/data/tabular/{containers => query}/_lazy_string_cell/test_contains.py (100%) rename tests/safeds/data/tabular/{containers => query}/_lazy_string_cell/test_ends_with.py (100%) rename tests/safeds/data/tabular/{containers => query}/_lazy_string_cell/test_equals.py (85%) rename tests/safeds/data/tabular/{containers => query}/_lazy_string_cell/test_hash.py (82%) rename tests/safeds/data/tabular/{containers => query}/_lazy_string_cell/test_index_of.py (100%) rename tests/safeds/data/tabular/{containers => query}/_lazy_string_cell/test_length.py (100%) rename tests/safeds/data/tabular/{containers => query}/_lazy_string_cell/test_replace.py (100%) rename tests/safeds/data/tabular/{containers => query}/_lazy_string_cell/test_sizeof.py (72%) rename tests/safeds/data/tabular/{containers => query}/_lazy_string_cell/test_starts_with.py (100%) rename tests/safeds/data/tabular/{containers => query}/_lazy_string_cell/test_substring.py (100%) rename tests/safeds/data/tabular/{containers => query}/_lazy_string_cell/test_to_date.py (100%) rename tests/safeds/data/tabular/{containers => query}/_lazy_string_cell/test_to_datetime.py (100%) rename tests/safeds/data/tabular/{containers => query}/_lazy_string_cell/test_to_float.py (100%) rename tests/safeds/data/tabular/{containers => query}/_lazy_string_cell/test_to_int.py (100%) rename tests/safeds/data/tabular/{containers => query}/_lazy_string_cell/test_to_lowercase.py (100%) rename tests/safeds/data/tabular/{containers => query}/_lazy_string_cell/test_to_uppercase.py (100%) rename tests/safeds/data/tabular/{containers => query}/_lazy_string_cell/test_trim.py (100%) rename tests/safeds/data/tabular/{containers => query}/_lazy_string_cell/test_trim_end.py (100%) rename tests/safeds/data/tabular/{containers => query}/_lazy_string_cell/test_trim_start.py (100%) create mode 100644 tests/safeds/data/tabular/query/_lazy_temporal_cell/__init__.py rename tests/safeds/data/tabular/{containers => query}/_lazy_temporal_cell/test_century.py (100%) rename tests/safeds/data/tabular/{containers => query}/_lazy_temporal_cell/test_date_to_string.py (100%) rename tests/safeds/data/tabular/{containers => query}/_lazy_temporal_cell/test_datetime_to_string.py (100%) rename tests/safeds/data/tabular/{containers => query}/_lazy_temporal_cell/test_day.py (100%) rename tests/safeds/data/tabular/{containers => query}/_lazy_temporal_cell/test_equals.py (85%) rename tests/safeds/data/tabular/{containers => query}/_lazy_temporal_cell/test_hash.py (81%) rename tests/safeds/data/tabular/{containers => query}/_lazy_temporal_cell/test_month.py (100%) rename tests/safeds/data/tabular/{containers => query}/_lazy_temporal_cell/test_sizeof.py (71%) rename tests/safeds/data/tabular/{containers => query}/_lazy_temporal_cell/test_week.py (100%) rename tests/safeds/data/tabular/{containers => query}/_lazy_temporal_cell/test_weekday.py (100%) rename tests/safeds/data/tabular/{containers => query}/_lazy_temporal_cell/test_year.py (100%) diff --git a/src/safeds/_typing/__init__.py b/src/safeds/_typing/__init__.py index b418fe5f9..3e9087645 100644 --- a/src/safeds/_typing/__init__.py +++ b/src/safeds/_typing/__init__.py @@ -6,21 +6,23 @@ from safeds.data.tabular.containers import Cell +# Literals _NumericLiteral: TypeAlias = int | float | Decimal _TemporalLiteral: TypeAlias = datetime.date | datetime.time | datetime.datetime | datetime.timedelta _PythonLiteral: TypeAlias = _NumericLiteral | bool | str | bytes | _TemporalLiteral + +# Convertible to cell (we cannot restrict `Cell`, because `Row.get_cell` returns a `Cell[Any]`) _ConvertibleToCell: TypeAlias = _PythonLiteral | Cell | None -_BooleanCell: TypeAlias = Cell[bool | None] -# We cannot restrict `Cell`, because `Row.get_cell` returns a `Cell[Any]`. _ConvertibleToBooleanCell: TypeAlias = bool | Cell | None _ConvertibleToIntCell: TypeAlias = int | Cell | None +_ConvertibleToStringCell: TypeAlias = str | Cell | None __all__ = [ - "_BooleanCell", "_ConvertibleToBooleanCell", "_ConvertibleToCell", "_ConvertibleToIntCell", + "_ConvertibleToStringCell", "_NumericLiteral", "_PythonLiteral", "_TemporalLiteral", diff --git a/src/safeds/data/tabular/containers/__init__.py b/src/safeds/data/tabular/containers/__init__.py index 5c0499ed0..5512b0b1e 100644 --- a/src/safeds/data/tabular/containers/__init__.py +++ b/src/safeds/data/tabular/containers/__init__.py @@ -8,9 +8,7 @@ from ._cell import Cell from ._column import Column from ._row import Row - from ._string_cell import StringCell from ._table import Table - from ._temporal_cell import TemporalCell apipkg.initpkg( __name__, @@ -18,8 +16,6 @@ "Cell": "._cell:Cell", "Column": "._column:Column", "Row": "._row:Row", - "StringCell": "._string_cell:StringCell", - "TemporalCell": "._temporal_cell:TemporalCell", "Table": "._table:Table", }, ) @@ -28,7 +24,5 @@ "Cell", "Column", "Row", - "StringCell", "Table", - "TemporalCell", ] diff --git a/src/safeds/data/tabular/containers/_cell.py b/src/safeds/data/tabular/containers/_cell.py index 0cc022b45..0988af8b3 100644 --- a/src/safeds/data/tabular/containers/_cell.py +++ b/src/safeds/data/tabular/containers/_cell.py @@ -9,17 +9,15 @@ import polars as pl from safeds._typing import ( - _BooleanCell, _ConvertibleToBooleanCell, _ConvertibleToCell, _ConvertibleToIntCell, _PythonLiteral, ) + from safeds.data.tabular.query._string_cell import StringCell + from safeds.data.tabular.query._temporal_cell import TemporalCell from safeds.data.tabular.typing import ColumnType - from ._string_cell import StringCell - from ._temporal_cell import TemporalCell - T_co = TypeVar("T_co", covariant=True) P = TypeVar("P") @@ -400,46 +398,46 @@ def first_not_none(cells: list[Cell[P]]) -> Cell[P | None]: # "Boolean" operators (actually bitwise) ----------------------------------- @abstractmethod - def __invert__(self) -> _BooleanCell: ... + def __invert__(self) -> Cell[bool | None]: ... @abstractmethod - def __and__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: ... + def __and__(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: ... @abstractmethod - def __rand__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: ... + def __rand__(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: ... @abstractmethod - def __or__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: ... + def __or__(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: ... @abstractmethod - def __ror__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: ... + def __ror__(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: ... @abstractmethod - def __xor__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: ... + def __xor__(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: ... @abstractmethod - def __rxor__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: ... + def __rxor__(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: ... # Comparison --------------------------------------------------------------- @abstractmethod - def __eq__(self, other: _ConvertibleToCell) -> _BooleanCell: # type: ignore[override] + def __eq__(self, other: _ConvertibleToCell) -> Cell[bool | None]: # type: ignore[override] ... @abstractmethod - def __ge__(self, other: _ConvertibleToCell) -> _BooleanCell: ... + def __ge__(self, other: _ConvertibleToCell) -> Cell[bool | None]: ... @abstractmethod - def __gt__(self, other: _ConvertibleToCell) -> _BooleanCell: ... + def __gt__(self, other: _ConvertibleToCell) -> Cell[bool | None]: ... @abstractmethod - def __le__(self, other: _ConvertibleToCell) -> _BooleanCell: ... + def __le__(self, other: _ConvertibleToCell) -> Cell[bool | None]: ... @abstractmethod - def __lt__(self, other: _ConvertibleToCell) -> _BooleanCell: ... + def __lt__(self, other: _ConvertibleToCell) -> Cell[bool | None]: ... @abstractmethod - def __ne__(self, other: _ConvertibleToCell) -> _BooleanCell: # type: ignore[override] + def __ne__(self, other: _ConvertibleToCell) -> Cell[bool | None]: # type: ignore[override] ... # Numeric operators -------------------------------------------------------- @@ -566,7 +564,7 @@ def dt(self) -> TemporalCell: # Boolean operations # ------------------------------------------------------------------------------------------------------------------ - def not_(self) -> _BooleanCell: + def not_(self) -> Cell[bool | None]: """ Negate a boolean. This is equivalent to the `~` operator. @@ -601,7 +599,7 @@ def not_(self) -> _BooleanCell: """ return self.__invert__() - def and_(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: + def and_(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: """ Perform a boolean AND operation. This is equivalent to the `&` operator. @@ -636,7 +634,7 @@ def and_(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: """ return self.__and__(other) - def or_(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: + def or_(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: """ Perform a boolean OR operation. This is equivalent to the `|` operator. @@ -670,7 +668,7 @@ def or_(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: """ return self.__or__(other) - def xor(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: + def xor(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: """ Perform a boolean XOR operation. This is equivalent to the `^` operator. @@ -1006,7 +1004,7 @@ def eq( other: _ConvertibleToCell, *, propagate_missing_values: bool = True, - ) -> _BooleanCell: + ) -> Cell[bool | None]: """ Check if equal to a value. The default behavior is equivalent to the `==` operator. @@ -1062,7 +1060,7 @@ def neq( other: _ConvertibleToCell, *, propagate_missing_values: bool = True, - ) -> _BooleanCell: + ) -> Cell[bool | None]: """ Check if not equal to a value. The default behavior is equivalent to the `!=` operator. @@ -1119,7 +1117,7 @@ def neq( +-------+ """ - def ge(self, other: _ConvertibleToCell) -> _BooleanCell: + def ge(self, other: _ConvertibleToCell) -> Cell[bool | None]: """ Check if greater than or equal to a value. This is equivalent to the `>=` operator. @@ -1151,7 +1149,7 @@ def ge(self, other: _ConvertibleToCell) -> _BooleanCell: """ return self.__ge__(other) - def gt(self, other: _ConvertibleToCell) -> _BooleanCell: + def gt(self, other: _ConvertibleToCell) -> Cell[bool | None]: """ Check if greater than a value. This is equivalent to the `>` operator. @@ -1183,7 +1181,7 @@ def gt(self, other: _ConvertibleToCell) -> _BooleanCell: """ return self.__gt__(other) - def le(self, other: _ConvertibleToCell) -> _BooleanCell: + def le(self, other: _ConvertibleToCell) -> Cell[bool | None]: """ Check if less than or equal to a value. This is equivalent to the `<=` operator. @@ -1215,7 +1213,7 @@ def le(self, other: _ConvertibleToCell) -> _BooleanCell: """ return self.__le__(other) - def lt(self, other: _ConvertibleToCell) -> _BooleanCell: + def lt(self, other: _ConvertibleToCell) -> Cell[bool | None]: """ Check if less than a value. This is equivalent to the `<` operator. diff --git a/src/safeds/data/tabular/containers/_column.py b/src/safeds/data/tabular/containers/_column.py index 6e4f86fc1..2817a008a 100644 --- a/src/safeds/data/tabular/containers/_column.py +++ b/src/safeds/data/tabular/containers/_column.py @@ -18,7 +18,6 @@ if TYPE_CHECKING: from polars import Series - from safeds._typing import _BooleanCell from safeds.data.tabular.typing import ColumnType from safeds.exceptions import ( # noqa: F401 ColumnTypeError, @@ -325,7 +324,7 @@ def get_value(self, index: int) -> T_co: @overload def all( self, - predicate: Callable[[Cell[T_co]], _BooleanCell], + predicate: Callable[[Cell[T_co]], Cell[bool | None]], *, ignore_unknown: Literal[True] = ..., ) -> bool: ... @@ -333,14 +332,14 @@ def all( @overload def all( self, - predicate: Callable[[Cell[T_co]], _BooleanCell], + predicate: Callable[[Cell[T_co]], Cell[bool | None]], *, ignore_unknown: bool, ) -> bool | None: ... def all( self, - predicate: Callable[[Cell[T_co]], _BooleanCell], + predicate: Callable[[Cell[T_co]], Cell[bool | None]], *, ignore_unknown: bool = True, ) -> bool | None: @@ -401,7 +400,7 @@ def all( @overload def any( self, - predicate: Callable[[Cell[T_co]], _BooleanCell], + predicate: Callable[[Cell[T_co]], Cell[bool | None]], *, ignore_unknown: Literal[True] = ..., ) -> bool: ... @@ -409,14 +408,14 @@ def any( @overload def any( self, - predicate: Callable[[Cell[T_co]], _BooleanCell], + predicate: Callable[[Cell[T_co]], Cell[bool | None]], *, ignore_unknown: bool, ) -> bool | None: ... def any( self, - predicate: Callable[[Cell[T_co]], _BooleanCell], + predicate: Callable[[Cell[T_co]], Cell[bool | None]], *, ignore_unknown: bool = True, ) -> bool | None: @@ -477,7 +476,7 @@ def any( @overload def count_if( self, - predicate: Callable[[Cell[T_co]], _BooleanCell], + predicate: Callable[[Cell[T_co]], Cell[bool | None]], *, ignore_unknown: Literal[True] = ..., ) -> int: ... @@ -485,14 +484,14 @@ def count_if( @overload def count_if( self, - predicate: Callable[[Cell[T_co]], _BooleanCell], + predicate: Callable[[Cell[T_co]], Cell[bool | None]], *, ignore_unknown: bool, ) -> int | None: ... def count_if( self, - predicate: Callable[[Cell[T_co]], _BooleanCell], + predicate: Callable[[Cell[T_co]], Cell[bool | None]], *, ignore_unknown: bool = True, ) -> int | None: @@ -547,7 +546,7 @@ def count_if( @overload def none( self, - predicate: Callable[[Cell[T_co]], _BooleanCell], + predicate: Callable[[Cell[T_co]], Cell[bool | None]], *, ignore_unknown: Literal[True] = ..., ) -> bool: ... @@ -555,14 +554,14 @@ def none( @overload def none( self, - predicate: Callable[[Cell[T_co]], _BooleanCell], + predicate: Callable[[Cell[T_co]], Cell[bool | None]], *, ignore_unknown: bool, ) -> bool | None: ... def none( self, - predicate: Callable[[Cell[T_co]], _BooleanCell], + predicate: Callable[[Cell[T_co]], Cell[bool | None]], *, ignore_unknown: bool = True, ) -> bool | None: diff --git a/src/safeds/data/tabular/containers/_lazy_cell.py b/src/safeds/data/tabular/containers/_lazy_cell.py index e4f850537..8d711bfcc 100644 --- a/src/safeds/data/tabular/containers/_lazy_cell.py +++ b/src/safeds/data/tabular/containers/_lazy_cell.py @@ -9,12 +9,10 @@ if TYPE_CHECKING: import polars as pl - from safeds._typing import _BooleanCell, _ConvertibleToBooleanCell, _ConvertibleToCell + from safeds._typing import _ConvertibleToBooleanCell, _ConvertibleToCell + from safeds.data.tabular.query import StringCell, TemporalCell from safeds.data.tabular.typing import ColumnType - from ._string_cell import StringCell - from ._temporal_cell import TemporalCell - T = TypeVar("T") @@ -34,58 +32,58 @@ def __init__(self, expression: pl.Expr) -> None: # "Boolean" operators (actually bitwise) ----------------------------------- - def __invert__(self) -> _BooleanCell: + def __invert__(self) -> Cell[bool | None]: import polars as pl return _wrap(self._expression.cast(pl.Boolean).__invert__()) - def __and__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: + def __and__(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: other = _unwrap(other) return _wrap(self._expression.__and__(other)) - def __rand__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: + def __rand__(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: other = _unwrap(other) return _wrap(self._expression.__rand__(other)) - def __or__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: + def __or__(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: other = _unwrap(other) return _wrap(self._expression.__or__(other)) - def __ror__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: + def __ror__(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: other = _unwrap(other) return _wrap(self._expression.__ror__(other)) - def __xor__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: + def __xor__(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: other = _unwrap(other) return _wrap(self._expression.__xor__(other)) - def __rxor__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: + def __rxor__(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: other = _unwrap(other) return _wrap(self._expression.__rxor__(other)) # Comparison --------------------------------------------------------------- - def __eq__(self, other: _ConvertibleToCell) -> _BooleanCell: # type: ignore[override] + def __eq__(self, other: _ConvertibleToCell) -> Cell[bool | None]: # type: ignore[override] other = _unwrap(other) return _wrap(self._expression.__eq__(other)) - def __ge__(self, other: _ConvertibleToCell) -> _BooleanCell: + def __ge__(self, other: _ConvertibleToCell) -> Cell[bool | None]: other = _unwrap(other) return _wrap(self._expression.__ge__(other)) - def __gt__(self, other: _ConvertibleToCell) -> _BooleanCell: + def __gt__(self, other: _ConvertibleToCell) -> Cell[bool | None]: other = _unwrap(other) return _wrap(self._expression.__gt__(other)) - def __le__(self, other: _ConvertibleToCell) -> _BooleanCell: + def __le__(self, other: _ConvertibleToCell) -> Cell[bool | None]: other = _unwrap(other) return _wrap(self._expression.__le__(other)) - def __lt__(self, other: _ConvertibleToCell) -> _BooleanCell: + def __lt__(self, other: _ConvertibleToCell) -> Cell[bool | None]: other = _unwrap(other) return _wrap(self._expression.__lt__(other)) - def __ne__(self, other: _ConvertibleToCell) -> _BooleanCell: # type: ignore[override] + def __ne__(self, other: _ConvertibleToCell) -> Cell[bool | None]: # type: ignore[override] other = _unwrap(other) return _wrap(self._expression.__ne__(other)) @@ -182,20 +180,20 @@ def __str__(self) -> str: @property def str(self) -> StringCell: - from ._lazy_string_cell import _LazyStringCell # circular import + from safeds.data.tabular.query._lazy_string_cell import _LazyStringCell # circular import return _LazyStringCell(self._expression) @property def dt(self) -> TemporalCell: - from ._lazy_temporal_cell import _LazyTemporalCell # circular import + from safeds.data.tabular.query._lazy_temporal_cell import _LazyTemporalCell # circular import return _LazyTemporalCell(self._expression) # ------------------------------------------------------------------------------------------------------------------ # Comparison operations # ------------------------------------------------------------------------------------------------------------------ - def eq(self, other: _ConvertibleToCell, *, propagate_missing_values: bool = True) -> _BooleanCell: + def eq(self, other: _ConvertibleToCell, *, propagate_missing_values: bool = True) -> Cell[bool | None]: other = _unwrap(other) if propagate_missing_values: @@ -203,7 +201,7 @@ def eq(self, other: _ConvertibleToCell, *, propagate_missing_values: bool = True else: return _wrap(self._expression.eq_missing(other)) - def neq(self, other: _ConvertibleToCell, *, propagate_missing_values: bool = True) -> _BooleanCell: + def neq(self, other: _ConvertibleToCell, *, propagate_missing_values: bool = True) -> Cell[bool | None]: other = _unwrap(other) if propagate_missing_values: diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index e3c23f0f9..f66cede68 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -40,7 +40,6 @@ from torch import Tensor from torch.utils.data import DataLoader, Dataset - from safeds._typing import _BooleanCell from safeds.data.labeled.containers import TabularDataset from safeds.data.tabular.transformation import ( InvertibleTableTransformer, @@ -1280,7 +1279,7 @@ def transform_columns( @overload def count_rows_if( self, - predicate: Callable[[Row], _BooleanCell], + predicate: Callable[[Row], Cell[bool | None]], *, ignore_unknown: Literal[True] = ..., ) -> int: ... @@ -1288,14 +1287,14 @@ def count_rows_if( @overload def count_rows_if( self, - predicate: Callable[[Row], _BooleanCell], + predicate: Callable[[Row], Cell[bool | None]], *, ignore_unknown: bool, ) -> int | None: ... def count_rows_if( self, - predicate: Callable[[Row], _BooleanCell], + predicate: Callable[[Row], Cell[bool | None]], *, ignore_unknown: bool = True, ) -> int | None: @@ -1346,7 +1345,7 @@ def count_rows_if( def filter_rows( self, - predicate: Callable[[Row], _BooleanCell], + predicate: Callable[[Row], Cell[bool | None]], ) -> Table: """ Keep only rows that satisfy a condition and return the result as a new table. @@ -1393,7 +1392,7 @@ def filter_rows( def filter_rows_by_column( self, name: str, - predicate: Callable[[Cell], _BooleanCell], + predicate: Callable[[Cell], Cell[bool | None]], ) -> Table: """ Keep only rows that satisfy a condition on a specific column and return the result as a new table. @@ -1488,7 +1487,7 @@ def remove_duplicate_rows(self) -> Table: def remove_rows( self, - predicate: Callable[[Row], _BooleanCell], + predicate: Callable[[Row], Cell[bool | None]], ) -> Table: """ Remove rows that satisfy a condition and return the result as a new table. @@ -1540,7 +1539,7 @@ def remove_rows( def remove_rows_by_column( self, name: str, - predicate: Callable[[Cell], _BooleanCell], + predicate: Callable[[Cell], Cell[bool | None]], ) -> Table: """ Remove rows that satisfy a condition on a specific column and return the result as a new table. diff --git a/src/safeds/data/tabular/query/__init__.py b/src/safeds/data/tabular/query/__init__.py new file mode 100644 index 000000000..0914e01b0 --- /dev/null +++ b/src/safeds/data/tabular/query/__init__.py @@ -0,0 +1,22 @@ +"""Classes that represent queries on the data.""" + +from typing import TYPE_CHECKING + +import apipkg + +if TYPE_CHECKING: + from ._string_cell import StringCell + from ._temporal_cell import TemporalCell + +apipkg.initpkg( + __name__, + { + "StringCell": "._string_cell:StringCell", + "TemporalCell": "._temporal_cell:TemporalCell", + }, +) + +__all__ = [ + "StringCell", + "TemporalCell", +] diff --git a/src/safeds/data/tabular/containers/_lazy_string_cell.py b/src/safeds/data/tabular/query/_lazy_string_cell.py similarity index 81% rename from src/safeds/data/tabular/containers/_lazy_string_cell.py rename to src/safeds/data/tabular/query/_lazy_string_cell.py index 20f80e0fc..57e770087 100644 --- a/src/safeds/data/tabular/containers/_lazy_string_cell.py +++ b/src/safeds/data/tabular/query/_lazy_string_cell.py @@ -4,16 +4,15 @@ from safeds._utils import _structural_hash from safeds._validation import _check_bounds, _ClosedBound - -from ._lazy_cell import _LazyCell -from ._string_cell import StringCell +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query._string_cell import StringCell if TYPE_CHECKING: import datetime import polars as pl - from ._cell import Cell + from safeds.data.tabular.containers._cell import Cell class _LazyStringCell(StringCell): @@ -34,28 +33,28 @@ def __sizeof__(self) -> int: # String operations # ------------------------------------------------------------------------------------------------------------------ - def contains(self, substring: str) -> Cell[bool]: + def contains(self, substring: str) -> Cell[bool | None]: return _LazyCell(self._expression.str.contains(substring, literal=True)) - def length(self, optimize_for_ascii: bool = False) -> Cell[int]: + def length(self, optimize_for_ascii: bool = False) -> Cell[int | None]: if optimize_for_ascii: return _LazyCell(self._expression.str.len_bytes()) else: return _LazyCell(self._expression.str.len_chars()) - def ends_with(self, suffix: str) -> Cell[bool]: + def ends_with(self, suffix: str) -> Cell[bool | None]: return _LazyCell(self._expression.str.ends_with(suffix)) def index_of(self, substring: str) -> Cell[int | None]: return _LazyCell(self._expression.str.find(substring, literal=True)) - def replace(self, old: str, new: str) -> Cell[str]: + def replace(self, old: str, new: str) -> Cell[str | None]: return _LazyCell(self._expression.str.replace_all(old, new, literal=True)) - def starts_with(self, prefix: str) -> Cell[bool]: + def starts_with(self, prefix: str) -> Cell[bool | None]: return _LazyCell(self._expression.str.starts_with(prefix)) - def substring(self, start: int = 0, length: int | None = None) -> Cell[str]: + def substring(self, start: int = 0, length: int | None = None) -> Cell[str | None]: _check_bounds("length", length, lower_bound=_ClosedBound(0)) return _LazyCell(self._expression.str.slice(start, length)) @@ -74,19 +73,19 @@ def to_float(self) -> Cell[float | None]: return _LazyCell(self._expression.cast(pl.Float64, strict=False)) - def to_lowercase(self) -> Cell[str]: + def to_lowercase(self) -> Cell[str | None]: return _LazyCell(self._expression.str.to_lowercase()) - def to_uppercase(self) -> Cell[str]: + def to_uppercase(self) -> Cell[str | None]: return _LazyCell(self._expression.str.to_uppercase()) - def trim(self) -> Cell[str]: + def trim(self) -> Cell[str | None]: return _LazyCell(self._expression.str.strip_chars()) - def trim_end(self) -> Cell[str]: + def trim_end(self) -> Cell[str | None]: return _LazyCell(self._expression.str.strip_chars_end()) - def trim_start(self) -> Cell[str]: + def trim_start(self) -> Cell[str | None]: return _LazyCell(self._expression.str.strip_chars_start()) # ------------------------------------------------------------------------------------------------------------------ @@ -98,4 +97,4 @@ def _equals(self, other: object) -> bool: return NotImplemented if self is other: return True - return self._expression.meta.eq(other._expression.meta) + return self._expression.meta.eq(other._expression) diff --git a/src/safeds/data/tabular/containers/_lazy_temporal_cell.py b/src/safeds/data/tabular/query/_lazy_temporal_cell.py similarity index 89% rename from src/safeds/data/tabular/containers/_lazy_temporal_cell.py rename to src/safeds/data/tabular/query/_lazy_temporal_cell.py index 180ecb58c..2a051a329 100644 --- a/src/safeds/data/tabular/containers/_lazy_temporal_cell.py +++ b/src/safeds/data/tabular/query/_lazy_temporal_cell.py @@ -3,14 +3,13 @@ from typing import TYPE_CHECKING from safeds._utils import _structural_hash - -from ._lazy_cell import _LazyCell -from ._temporal_cell import TemporalCell +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query._temporal_cell import TemporalCell if TYPE_CHECKING: import polars as pl - from ._cell import Cell + from safeds.data.tabular.containers._cell import Cell class _LazyTemporalCell(TemporalCell): @@ -31,30 +30,30 @@ def __sizeof__(self) -> int: # Temporal operations # ------------------------------------------------------------------------------------------------------------------ - def century(self) -> Cell[int]: + def century(self) -> Cell[int | None]: return _LazyCell(self._expression.dt.century()) - def weekday(self) -> Cell[int]: + def weekday(self) -> Cell[int | None]: return _LazyCell(self._expression.dt.weekday()) - def week(self) -> Cell[int]: + def week(self) -> Cell[int | None]: return _LazyCell(self._expression.dt.week()) - def year(self) -> Cell[int]: + def year(self) -> Cell[int | None]: return _LazyCell(self._expression.dt.year()) - def month(self) -> Cell[int]: + def month(self) -> Cell[int | None]: return _LazyCell(self._expression.dt.month()) - def day(self) -> Cell[int]: + def day(self) -> Cell[int | None]: return _LazyCell(self._expression.dt.day()) - def datetime_to_string(self, format_string: str = "%Y/%m/%d %H:%M:%S") -> Cell[str]: + def datetime_to_string(self, format_string: str = "%Y/%m/%d %H:%M:%S") -> Cell[str | None]: if not _check_format_string(format_string): raise ValueError("Invalid format string") return _LazyCell(self._expression.dt.to_string(format=format_string)) - def date_to_string(self, format_string: str = "%F") -> Cell[str]: + def date_to_string(self, format_string: str = "%F") -> Cell[str | None]: if not _check_format_string(format_string): # Fehler in _check_format_string raise ValueError("Invalid format string") @@ -69,7 +68,7 @@ def _equals(self, other: object) -> bool: return NotImplemented if self is other: return True - return self._expression.meta.eq(other._expression.meta) + return self._expression.meta.eq(other._expression) def _check_format_string(format_string: str) -> bool: diff --git a/src/safeds/data/tabular/containers/_string_cell.py b/src/safeds/data/tabular/query/_string_cell.py similarity index 96% rename from src/safeds/data/tabular/containers/_string_cell.py rename to src/safeds/data/tabular/query/_string_cell.py index e47a41444..747fc397d 100644 --- a/src/safeds/data/tabular/containers/_string_cell.py +++ b/src/safeds/data/tabular/query/_string_cell.py @@ -8,6 +8,8 @@ from safeds.data.tabular.containers import Cell +# TODO: examples with None + class StringCell(ABC): """ @@ -32,7 +34,7 @@ class StringCell(ABC): """ @abstractmethod - def contains(self, substring: str) -> Cell[bool]: + def contains(self, substring: str) -> Cell[bool | None]: """ Check if the string value in the cell contains the substring. @@ -55,7 +57,7 @@ def contains(self, substring: str) -> Cell[bool]: """ @abstractmethod - def ends_with(self, suffix: str) -> Cell[bool]: + def ends_with(self, suffix: str) -> Cell[bool | None]: """ Check if the string value in the cell ends with the suffix. @@ -109,7 +111,7 @@ def index_of(self, substring: str) -> Cell[int | None]: """ @abstractmethod - def length(self, *, optimize_for_ascii: bool = False) -> Cell[int]: + def length(self, *, optimize_for_ascii: bool = False) -> Cell[int | None]: """ Get the number of characters of the string value in the cell. @@ -141,7 +143,7 @@ def length(self, *, optimize_for_ascii: bool = False) -> Cell[int]: """ @abstractmethod - def replace(self, old: str, new: str) -> Cell[str]: + def replace(self, old: str, new: str) -> Cell[str | None]: """ Replace occurrences of the old substring with the new substring in the string value in the cell. @@ -174,7 +176,7 @@ def replace(self, old: str, new: str) -> Cell[str]: """ @abstractmethod - def starts_with(self, prefix: str) -> Cell[bool]: + def starts_with(self, prefix: str) -> Cell[bool | None]: """ Check if the string value in the cell starts with the prefix. @@ -197,7 +199,7 @@ def starts_with(self, prefix: str) -> Cell[bool]: """ @abstractmethod - def substring(self, start: int = 0, length: int | None = None) -> Cell[str]: + def substring(self, start: int = 0, length: int | None = None) -> Cell[str | None]: """ Get a substring of the string value in the cell. @@ -360,7 +362,7 @@ def to_int(self, *, base: int = 10) -> Cell[int | None]: """ @abstractmethod - def to_lowercase(self) -> Cell[str]: + def to_lowercase(self) -> Cell[str | None]: """ Convert the string value in the cell to lowercase. @@ -386,7 +388,7 @@ def to_lowercase(self) -> Cell[str]: """ @abstractmethod - def to_uppercase(self) -> Cell[str]: + def to_uppercase(self) -> Cell[str | None]: """ Convert the string value in the cell to uppercase. @@ -412,7 +414,7 @@ def to_uppercase(self) -> Cell[str]: """ @abstractmethod - def trim(self) -> Cell[str]: + def trim(self) -> Cell[str | None]: """ Remove whitespace from the start and end of the string value in the cell. @@ -439,7 +441,7 @@ def trim(self) -> Cell[str]: """ @abstractmethod - def trim_end(self) -> Cell[str]: + def trim_end(self) -> Cell[str | None]: """ Remove whitespace from the end of the string value in the cell. @@ -466,7 +468,7 @@ def trim_end(self) -> Cell[str]: """ @abstractmethod - def trim_start(self) -> Cell[str]: + def trim_start(self) -> Cell[str | None]: """ Remove whitespace from the start of the string value in the cell. diff --git a/src/safeds/data/tabular/containers/_temporal_cell.py b/src/safeds/data/tabular/query/_temporal_cell.py similarity index 95% rename from src/safeds/data/tabular/containers/_temporal_cell.py rename to src/safeds/data/tabular/query/_temporal_cell.py index e4a3dca59..b9947e8c8 100644 --- a/src/safeds/data/tabular/containers/_temporal_cell.py +++ b/src/safeds/data/tabular/query/_temporal_cell.py @@ -7,6 +7,9 @@ from safeds.data.tabular.containers import Cell +# TODO: Examples with None + + class TemporalCell(ABC): """ Namespace for operations on temporal data. @@ -29,7 +32,7 @@ class TemporalCell(ABC): """ @abstractmethod - def century(self) -> Cell[int]: + def century(self) -> Cell[int | None]: """ Get the century of the underlying date(time) data. @@ -53,7 +56,7 @@ def century(self) -> Cell[int]: """ @abstractmethod - def weekday(self) -> Cell[int]: + def weekday(self) -> Cell[int | None]: """ Get the weekday of the underlying date(time) data. @@ -77,7 +80,7 @@ def weekday(self) -> Cell[int]: """ @abstractmethod - def week(self) -> Cell[int]: + def week(self) -> Cell[int | None]: """ Get the week of the underlying date(time) data. @@ -101,7 +104,7 @@ def week(self) -> Cell[int]: """ @abstractmethod - def year(self) -> Cell[int]: + def year(self) -> Cell[int | None]: """ Get the year of the underlying date(time) data. @@ -125,7 +128,7 @@ def year(self) -> Cell[int]: """ @abstractmethod - def month(self) -> Cell[int]: + def month(self) -> Cell[int | None]: """ Get the month of the underlying date(time) data. @@ -149,7 +152,7 @@ def month(self) -> Cell[int]: """ @abstractmethod - def day(self) -> Cell[int]: + def day(self) -> Cell[int | None]: """ Get the day of the underlying date(time) data. @@ -173,7 +176,7 @@ def day(self) -> Cell[int]: """ @abstractmethod - def datetime_to_string(self, format_string: str = "%Y/%m/%d %H:%M:%S") -> Cell[str]: + def datetime_to_string(self, format_string: str = "%Y/%m/%d %H:%M:%S") -> Cell[str | None]: """ Convert the date value in the cell to a string. @@ -208,7 +211,7 @@ def datetime_to_string(self, format_string: str = "%Y/%m/%d %H:%M:%S") -> Cell[s """ @abstractmethod - def date_to_string(self, format_string: str = "%F") -> Cell[str]: + def date_to_string(self, format_string: str = "%F") -> Cell[str | None]: """ Convert the date value in the cell to a string. diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/__init__.py b/tests/safeds/data/tabular/query/__init__.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/__init__.py rename to tests/safeds/data/tabular/query/__init__.py diff --git a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/__init__.py b/tests/safeds/data/tabular/query/_lazy_string_cell/__init__.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_temporal_cell/__init__.py rename to tests/safeds/data/tabular/query/_lazy_string_cell/__init__.py diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_contains.py b/tests/safeds/data/tabular/query/_lazy_string_cell/test_contains.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/test_contains.py rename to tests/safeds/data/tabular/query/_lazy_string_cell/test_contains.py diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_ends_with.py b/tests/safeds/data/tabular/query/_lazy_string_cell/test_ends_with.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/test_ends_with.py rename to tests/safeds/data/tabular/query/_lazy_string_cell/test_ends_with.py diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_equals.py b/tests/safeds/data/tabular/query/_lazy_string_cell/test_equals.py similarity index 85% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/test_equals.py rename to tests/safeds/data/tabular/query/_lazy_string_cell/test_equals.py index 11bff4b38..18d3d0d63 100644 --- a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_equals.py +++ b/tests/safeds/data/tabular/query/_lazy_string_cell/test_equals.py @@ -3,8 +3,9 @@ import polars as pl import pytest -from safeds.data.tabular.containers import StringCell, Table -from safeds.data.tabular.containers._lazy_string_cell import _LazyStringCell +from safeds.data.tabular.containers import Table +from safeds.data.tabular.query import StringCell +from safeds.data.tabular.query._lazy_string_cell import _LazyStringCell @pytest.mark.parametrize( diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_hash.py b/tests/safeds/data/tabular/query/_lazy_string_cell/test_hash.py similarity index 82% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/test_hash.py rename to tests/safeds/data/tabular/query/_lazy_string_cell/test_hash.py index f5962be36..df82c710c 100644 --- a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_hash.py +++ b/tests/safeds/data/tabular/query/_lazy_string_cell/test_hash.py @@ -1,8 +1,8 @@ import polars as pl import pytest -from safeds.data.tabular.containers import StringCell -from safeds.data.tabular.containers._lazy_string_cell import _LazyStringCell +from safeds.data.tabular.query import StringCell +from safeds.data.tabular.query._lazy_string_cell import _LazyStringCell def test_should_be_deterministic() -> None: diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_index_of.py b/tests/safeds/data/tabular/query/_lazy_string_cell/test_index_of.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/test_index_of.py rename to tests/safeds/data/tabular/query/_lazy_string_cell/test_index_of.py diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_length.py b/tests/safeds/data/tabular/query/_lazy_string_cell/test_length.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/test_length.py rename to tests/safeds/data/tabular/query/_lazy_string_cell/test_length.py diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_replace.py b/tests/safeds/data/tabular/query/_lazy_string_cell/test_replace.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/test_replace.py rename to tests/safeds/data/tabular/query/_lazy_string_cell/test_replace.py diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_sizeof.py b/tests/safeds/data/tabular/query/_lazy_string_cell/test_sizeof.py similarity index 72% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/test_sizeof.py rename to tests/safeds/data/tabular/query/_lazy_string_cell/test_sizeof.py index d01d9b0f7..9d3822d6e 100644 --- a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_sizeof.py +++ b/tests/safeds/data/tabular/query/_lazy_string_cell/test_sizeof.py @@ -2,7 +2,7 @@ import polars as pl -from safeds.data.tabular.containers._lazy_string_cell import _LazyStringCell +from safeds.data.tabular.query._lazy_string_cell import _LazyStringCell def test_should_return_size_greater_than_normal_object() -> None: diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_starts_with.py b/tests/safeds/data/tabular/query/_lazy_string_cell/test_starts_with.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/test_starts_with.py rename to tests/safeds/data/tabular/query/_lazy_string_cell/test_starts_with.py diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_substring.py b/tests/safeds/data/tabular/query/_lazy_string_cell/test_substring.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/test_substring.py rename to tests/safeds/data/tabular/query/_lazy_string_cell/test_substring.py diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_to_date.py b/tests/safeds/data/tabular/query/_lazy_string_cell/test_to_date.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/test_to_date.py rename to tests/safeds/data/tabular/query/_lazy_string_cell/test_to_date.py diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_to_datetime.py b/tests/safeds/data/tabular/query/_lazy_string_cell/test_to_datetime.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/test_to_datetime.py rename to tests/safeds/data/tabular/query/_lazy_string_cell/test_to_datetime.py diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_to_float.py b/tests/safeds/data/tabular/query/_lazy_string_cell/test_to_float.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/test_to_float.py rename to tests/safeds/data/tabular/query/_lazy_string_cell/test_to_float.py diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_to_int.py b/tests/safeds/data/tabular/query/_lazy_string_cell/test_to_int.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/test_to_int.py rename to tests/safeds/data/tabular/query/_lazy_string_cell/test_to_int.py diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_to_lowercase.py b/tests/safeds/data/tabular/query/_lazy_string_cell/test_to_lowercase.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/test_to_lowercase.py rename to tests/safeds/data/tabular/query/_lazy_string_cell/test_to_lowercase.py diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_to_uppercase.py b/tests/safeds/data/tabular/query/_lazy_string_cell/test_to_uppercase.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/test_to_uppercase.py rename to tests/safeds/data/tabular/query/_lazy_string_cell/test_to_uppercase.py diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_trim.py b/tests/safeds/data/tabular/query/_lazy_string_cell/test_trim.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/test_trim.py rename to tests/safeds/data/tabular/query/_lazy_string_cell/test_trim.py diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_trim_end.py b/tests/safeds/data/tabular/query/_lazy_string_cell/test_trim_end.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/test_trim_end.py rename to tests/safeds/data/tabular/query/_lazy_string_cell/test_trim_end.py diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_trim_start.py b/tests/safeds/data/tabular/query/_lazy_string_cell/test_trim_start.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/test_trim_start.py rename to tests/safeds/data/tabular/query/_lazy_string_cell/test_trim_start.py diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_cell/__init__.py b/tests/safeds/data/tabular/query/_lazy_temporal_cell/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_century.py b/tests/safeds/data/tabular/query/_lazy_temporal_cell/test_century.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_century.py rename to tests/safeds/data/tabular/query/_lazy_temporal_cell/test_century.py diff --git a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_date_to_string.py b/tests/safeds/data/tabular/query/_lazy_temporal_cell/test_date_to_string.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_date_to_string.py rename to tests/safeds/data/tabular/query/_lazy_temporal_cell/test_date_to_string.py diff --git a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_datetime_to_string.py b/tests/safeds/data/tabular/query/_lazy_temporal_cell/test_datetime_to_string.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_datetime_to_string.py rename to tests/safeds/data/tabular/query/_lazy_temporal_cell/test_datetime_to_string.py diff --git a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_day.py b/tests/safeds/data/tabular/query/_lazy_temporal_cell/test_day.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_day.py rename to tests/safeds/data/tabular/query/_lazy_temporal_cell/test_day.py diff --git a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_equals.py b/tests/safeds/data/tabular/query/_lazy_temporal_cell/test_equals.py similarity index 85% rename from tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_equals.py rename to tests/safeds/data/tabular/query/_lazy_temporal_cell/test_equals.py index 1dfc9e181..257e3368f 100644 --- a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_equals.py +++ b/tests/safeds/data/tabular/query/_lazy_temporal_cell/test_equals.py @@ -3,8 +3,9 @@ import polars as pl import pytest -from safeds.data.tabular.containers import Table, TemporalCell -from safeds.data.tabular.containers._lazy_temporal_cell import _LazyTemporalCell +from safeds.data.tabular.containers import Table +from safeds.data.tabular.query import TemporalCell +from safeds.data.tabular.query._lazy_temporal_cell import _LazyTemporalCell @pytest.mark.parametrize( diff --git a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_hash.py b/tests/safeds/data/tabular/query/_lazy_temporal_cell/test_hash.py similarity index 81% rename from tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_hash.py rename to tests/safeds/data/tabular/query/_lazy_temporal_cell/test_hash.py index c040d04b6..dd9a997f9 100644 --- a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_hash.py +++ b/tests/safeds/data/tabular/query/_lazy_temporal_cell/test_hash.py @@ -1,8 +1,8 @@ import polars as pl import pytest -from safeds.data.tabular.containers import TemporalCell -from safeds.data.tabular.containers._lazy_temporal_cell import _LazyTemporalCell +from safeds.data.tabular.query import TemporalCell +from safeds.data.tabular.query._lazy_temporal_cell import _LazyTemporalCell def test_should_be_deterministic() -> None: diff --git a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_month.py b/tests/safeds/data/tabular/query/_lazy_temporal_cell/test_month.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_month.py rename to tests/safeds/data/tabular/query/_lazy_temporal_cell/test_month.py diff --git a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_sizeof.py b/tests/safeds/data/tabular/query/_lazy_temporal_cell/test_sizeof.py similarity index 71% rename from tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_sizeof.py rename to tests/safeds/data/tabular/query/_lazy_temporal_cell/test_sizeof.py index 89ff99c06..1bf058b31 100644 --- a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_sizeof.py +++ b/tests/safeds/data/tabular/query/_lazy_temporal_cell/test_sizeof.py @@ -2,7 +2,7 @@ import polars as pl -from safeds.data.tabular.containers._lazy_temporal_cell import _LazyTemporalCell +from safeds.data.tabular.query._lazy_temporal_cell import _LazyTemporalCell def test_should_return_size_greater_than_normal_object() -> None: diff --git a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_week.py b/tests/safeds/data/tabular/query/_lazy_temporal_cell/test_week.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_week.py rename to tests/safeds/data/tabular/query/_lazy_temporal_cell/test_week.py diff --git a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_weekday.py b/tests/safeds/data/tabular/query/_lazy_temporal_cell/test_weekday.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_weekday.py rename to tests/safeds/data/tabular/query/_lazy_temporal_cell/test_weekday.py diff --git a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_year.py b/tests/safeds/data/tabular/query/_lazy_temporal_cell/test_year.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_year.py rename to tests/safeds/data/tabular/query/_lazy_temporal_cell/test_year.py From 35cea66ba35c8bf65eddc6830d7690fbd3882e35 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Wed, 15 Jan 2025 18:57:14 +0100 Subject: [PATCH 02/57] feat: rename `StringCell` to `StringOperations` and `TemporalCell` to `TemporalOperations` They are not subclasses of `Cell`, which might be implied by the name. --- src/safeds/data/tabular/containers/_cell.py | 7 ++- .../data/tabular/containers/_lazy_cell.py | 14 +++--- src/safeds/data/tabular/query/__init__.py | 12 ++--- ...ing_cell.py => _lazy_string_operations.py} | 7 +-- ...l_cell.py => _lazy_temporal_operations.py} | 7 +-- ...{_string_cell.py => _string_operations.py} | 2 +- ...mporal_cell.py => _temporal_operations.py} | 2 +- .../query/_lazy_string_cell/test_equals.py | 43 ----------------- .../query/_lazy_string_cell/test_hash.py | 25 ---------- .../__init__.py | 0 .../test_contains.py | 0 .../test_ends_with.py | 0 .../_lazy_string_operations/test_equals.py | 47 +++++++++++++++++++ .../_lazy_string_operations/test_hash.py | 25 ++++++++++ .../test_index_of.py | 0 .../test_length.py | 0 .../test_replace.py | 0 .../test_sizeof.py | 4 +- .../test_starts_with.py | 0 .../test_substring.py | 0 .../test_to_date.py | 0 .../test_to_datetime.py | 0 .../test_to_float.py | 0 .../test_to_int.py | 0 .../test_to_lowercase.py | 0 .../test_to_uppercase.py | 0 .../test_trim.py | 0 .../test_trim_end.py | 0 .../test_trim_start.py | 0 .../query/_lazy_temporal_cell/test_equals.py | 43 ----------------- .../query/_lazy_temporal_cell/test_hash.py | 25 ---------- .../__init__.py | 0 .../test_century.py | 0 .../test_date_to_string.py | 0 .../test_datetime_to_string.py | 0 .../test_day.py | 0 .../_lazy_temporal_operations/test_equals.py | 47 +++++++++++++++++++ .../_lazy_temporal_operations/test_hash.py | 25 ++++++++++ .../test_month.py | 0 .../test_sizeof.py | 4 +- .../test_week.py | 0 .../test_weekday.py | 0 .../test_year.py | 0 43 files changed, 174 insertions(+), 165 deletions(-) rename src/safeds/data/tabular/query/{_lazy_string_cell.py => _lazy_string_operations.py} (96%) rename src/safeds/data/tabular/query/{_lazy_temporal_cell.py => _lazy_temporal_operations.py} (96%) rename src/safeds/data/tabular/query/{_string_cell.py => _string_operations.py} (99%) rename src/safeds/data/tabular/query/{_temporal_cell.py => _temporal_operations.py} (99%) delete mode 100644 tests/safeds/data/tabular/query/_lazy_string_cell/test_equals.py delete mode 100644 tests/safeds/data/tabular/query/_lazy_string_cell/test_hash.py rename tests/safeds/data/tabular/query/{_lazy_string_cell => _lazy_string_operations}/__init__.py (100%) rename tests/safeds/data/tabular/query/{_lazy_string_cell => _lazy_string_operations}/test_contains.py (100%) rename tests/safeds/data/tabular/query/{_lazy_string_cell => _lazy_string_operations}/test_ends_with.py (100%) create mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_equals.py create mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_hash.py rename tests/safeds/data/tabular/query/{_lazy_string_cell => _lazy_string_operations}/test_index_of.py (100%) rename tests/safeds/data/tabular/query/{_lazy_string_cell => _lazy_string_operations}/test_length.py (100%) rename tests/safeds/data/tabular/query/{_lazy_string_cell => _lazy_string_operations}/test_replace.py (100%) rename tests/safeds/data/tabular/query/{_lazy_string_cell => _lazy_string_operations}/test_sizeof.py (54%) rename tests/safeds/data/tabular/query/{_lazy_string_cell => _lazy_string_operations}/test_starts_with.py (100%) rename tests/safeds/data/tabular/query/{_lazy_string_cell => _lazy_string_operations}/test_substring.py (100%) rename tests/safeds/data/tabular/query/{_lazy_string_cell => _lazy_string_operations}/test_to_date.py (100%) rename tests/safeds/data/tabular/query/{_lazy_string_cell => _lazy_string_operations}/test_to_datetime.py (100%) rename tests/safeds/data/tabular/query/{_lazy_string_cell => _lazy_string_operations}/test_to_float.py (100%) rename tests/safeds/data/tabular/query/{_lazy_string_cell => _lazy_string_operations}/test_to_int.py (100%) rename tests/safeds/data/tabular/query/{_lazy_string_cell => _lazy_string_operations}/test_to_lowercase.py (100%) rename tests/safeds/data/tabular/query/{_lazy_string_cell => _lazy_string_operations}/test_to_uppercase.py (100%) rename tests/safeds/data/tabular/query/{_lazy_string_cell => _lazy_string_operations}/test_trim.py (100%) rename tests/safeds/data/tabular/query/{_lazy_string_cell => _lazy_string_operations}/test_trim_end.py (100%) rename tests/safeds/data/tabular/query/{_lazy_string_cell => _lazy_string_operations}/test_trim_start.py (100%) delete mode 100644 tests/safeds/data/tabular/query/_lazy_temporal_cell/test_equals.py delete mode 100644 tests/safeds/data/tabular/query/_lazy_temporal_cell/test_hash.py rename tests/safeds/data/tabular/query/{_lazy_temporal_cell => _lazy_temporal_operations}/__init__.py (100%) rename tests/safeds/data/tabular/query/{_lazy_temporal_cell => _lazy_temporal_operations}/test_century.py (100%) rename tests/safeds/data/tabular/query/{_lazy_temporal_cell => _lazy_temporal_operations}/test_date_to_string.py (100%) rename tests/safeds/data/tabular/query/{_lazy_temporal_cell => _lazy_temporal_operations}/test_datetime_to_string.py (100%) rename tests/safeds/data/tabular/query/{_lazy_temporal_cell => _lazy_temporal_operations}/test_day.py (100%) create mode 100644 tests/safeds/data/tabular/query/_lazy_temporal_operations/test_equals.py create mode 100644 tests/safeds/data/tabular/query/_lazy_temporal_operations/test_hash.py rename tests/safeds/data/tabular/query/{_lazy_temporal_cell => _lazy_temporal_operations}/test_month.py (100%) rename tests/safeds/data/tabular/query/{_lazy_temporal_cell => _lazy_temporal_operations}/test_sizeof.py (53%) rename tests/safeds/data/tabular/query/{_lazy_temporal_cell => _lazy_temporal_operations}/test_week.py (100%) rename tests/safeds/data/tabular/query/{_lazy_temporal_cell => _lazy_temporal_operations}/test_weekday.py (100%) rename tests/safeds/data/tabular/query/{_lazy_temporal_cell => _lazy_temporal_operations}/test_year.py (100%) diff --git a/src/safeds/data/tabular/containers/_cell.py b/src/safeds/data/tabular/containers/_cell.py index 0988af8b3..8d6e8e964 100644 --- a/src/safeds/data/tabular/containers/_cell.py +++ b/src/safeds/data/tabular/containers/_cell.py @@ -14,8 +14,7 @@ _ConvertibleToIntCell, _PythonLiteral, ) - from safeds.data.tabular.query._string_cell import StringCell - from safeds.data.tabular.query._temporal_cell import TemporalCell + from safeds.data.tabular.query import StringOperations, TemporalOperations from safeds.data.tabular.typing import ColumnType T_co = TypeVar("T_co", covariant=True) @@ -519,7 +518,7 @@ def __str__(self) -> str: ... @property @abstractmethod - def str(self) -> StringCell: + def str(self) -> StringOperations: """ Namespace for operations on strings. @@ -540,7 +539,7 @@ def str(self) -> StringCell: @property @abstractmethod - def dt(self) -> TemporalCell: + def dt(self) -> TemporalOperations: """ Namespace for operations on temporal values. diff --git a/src/safeds/data/tabular/containers/_lazy_cell.py b/src/safeds/data/tabular/containers/_lazy_cell.py index 8d711bfcc..0d770a117 100644 --- a/src/safeds/data/tabular/containers/_lazy_cell.py +++ b/src/safeds/data/tabular/containers/_lazy_cell.py @@ -10,7 +10,7 @@ import polars as pl from safeds._typing import _ConvertibleToBooleanCell, _ConvertibleToCell - from safeds.data.tabular.query import StringCell, TemporalCell + from safeds.data.tabular.query import StringOperations, TemporalOperations from safeds.data.tabular.typing import ColumnType T = TypeVar("T") @@ -179,16 +179,16 @@ def __str__(self) -> str: # ------------------------------------------------------------------------------------------------------------------ @property - def str(self) -> StringCell: - from safeds.data.tabular.query._lazy_string_cell import _LazyStringCell # circular import + def str(self) -> StringOperations: + from safeds.data.tabular.query._lazy_string_operations import _LazyStringOperations # circular import - return _LazyStringCell(self._expression) + return _LazyStringOperations(self._expression) @property - def dt(self) -> TemporalCell: - from safeds.data.tabular.query._lazy_temporal_cell import _LazyTemporalCell # circular import + def dt(self) -> TemporalOperations: + from safeds.data.tabular.query._lazy_temporal_operations import _LazyTemporalOperations # circular import - return _LazyTemporalCell(self._expression) + return _LazyTemporalOperations(self._expression) # ------------------------------------------------------------------------------------------------------------------ # Comparison operations diff --git a/src/safeds/data/tabular/query/__init__.py b/src/safeds/data/tabular/query/__init__.py index 0914e01b0..52200686f 100644 --- a/src/safeds/data/tabular/query/__init__.py +++ b/src/safeds/data/tabular/query/__init__.py @@ -5,18 +5,18 @@ import apipkg if TYPE_CHECKING: - from ._string_cell import StringCell - from ._temporal_cell import TemporalCell + from ._string_operations import StringOperations + from ._temporal_operations import TemporalOperations apipkg.initpkg( __name__, { - "StringCell": "._string_cell:StringCell", - "TemporalCell": "._temporal_cell:TemporalCell", + "StringOperations": "._string_operations:StringOperations", + "TemporalOperations": "._temporal_operations:TemporalOperations", }, ) __all__ = [ - "StringCell", - "TemporalCell", + "StringOperations", + "TemporalOperations", ] diff --git a/src/safeds/data/tabular/query/_lazy_string_cell.py b/src/safeds/data/tabular/query/_lazy_string_operations.py similarity index 96% rename from src/safeds/data/tabular/query/_lazy_string_cell.py rename to src/safeds/data/tabular/query/_lazy_string_operations.py index 57e770087..c44fc6c6b 100644 --- a/src/safeds/data/tabular/query/_lazy_string_cell.py +++ b/src/safeds/data/tabular/query/_lazy_string_operations.py @@ -5,7 +5,8 @@ from safeds._utils import _structural_hash from safeds._validation import _check_bounds, _ClosedBound from safeds.data.tabular.containers._lazy_cell import _LazyCell -from safeds.data.tabular.query._string_cell import StringCell + +from ._string_operations import StringOperations if TYPE_CHECKING: import datetime @@ -15,7 +16,7 @@ from safeds.data.tabular.containers._cell import Cell -class _LazyStringCell(StringCell): +class _LazyStringOperations(StringOperations): # ------------------------------------------------------------------------------------------------------------------ # Dunder methods # ------------------------------------------------------------------------------------------------------------------ @@ -93,7 +94,7 @@ def trim_start(self) -> Cell[str | None]: # ------------------------------------------------------------------------------------------------------------------ def _equals(self, other: object) -> bool: - if not isinstance(other, _LazyStringCell): + if not isinstance(other, _LazyStringOperations): return NotImplemented if self is other: return True diff --git a/src/safeds/data/tabular/query/_lazy_temporal_cell.py b/src/safeds/data/tabular/query/_lazy_temporal_operations.py similarity index 96% rename from src/safeds/data/tabular/query/_lazy_temporal_cell.py rename to src/safeds/data/tabular/query/_lazy_temporal_operations.py index 2a051a329..45bf1e64a 100644 --- a/src/safeds/data/tabular/query/_lazy_temporal_cell.py +++ b/src/safeds/data/tabular/query/_lazy_temporal_operations.py @@ -4,7 +4,8 @@ from safeds._utils import _structural_hash from safeds.data.tabular.containers._lazy_cell import _LazyCell -from safeds.data.tabular.query._temporal_cell import TemporalCell + +from ._temporal_operations import TemporalOperations if TYPE_CHECKING: import polars as pl @@ -12,7 +13,7 @@ from safeds.data.tabular.containers._cell import Cell -class _LazyTemporalCell(TemporalCell): +class _LazyTemporalOperations(TemporalOperations): # ------------------------------------------------------------------------------------------------------------------ # Dunder methods # ------------------------------------------------------------------------------------------------------------------ @@ -64,7 +65,7 @@ def date_to_string(self, format_string: str = "%F") -> Cell[str | None]: # ------------------------------------------------------------------------------------------------------------------ def _equals(self, other: object) -> bool: - if not isinstance(other, _LazyTemporalCell): + if not isinstance(other, _LazyTemporalOperations): return NotImplemented if self is other: return True diff --git a/src/safeds/data/tabular/query/_string_cell.py b/src/safeds/data/tabular/query/_string_operations.py similarity index 99% rename from src/safeds/data/tabular/query/_string_cell.py rename to src/safeds/data/tabular/query/_string_operations.py index 747fc397d..3ac34e462 100644 --- a/src/safeds/data/tabular/query/_string_cell.py +++ b/src/safeds/data/tabular/query/_string_operations.py @@ -11,7 +11,7 @@ # TODO: examples with None -class StringCell(ABC): +class StringOperations(ABC): """ Namespace for operations on strings. diff --git a/src/safeds/data/tabular/query/_temporal_cell.py b/src/safeds/data/tabular/query/_temporal_operations.py similarity index 99% rename from src/safeds/data/tabular/query/_temporal_cell.py rename to src/safeds/data/tabular/query/_temporal_operations.py index b9947e8c8..157f5ba0c 100644 --- a/src/safeds/data/tabular/query/_temporal_cell.py +++ b/src/safeds/data/tabular/query/_temporal_operations.py @@ -10,7 +10,7 @@ # TODO: Examples with None -class TemporalCell(ABC): +class TemporalOperations(ABC): """ Namespace for operations on temporal data. diff --git a/tests/safeds/data/tabular/query/_lazy_string_cell/test_equals.py b/tests/safeds/data/tabular/query/_lazy_string_cell/test_equals.py deleted file mode 100644 index 18d3d0d63..000000000 --- a/tests/safeds/data/tabular/query/_lazy_string_cell/test_equals.py +++ /dev/null @@ -1,43 +0,0 @@ -from typing import Any - -import polars as pl -import pytest - -from safeds.data.tabular.containers import Table -from safeds.data.tabular.query import StringCell -from safeds.data.tabular.query._lazy_string_cell import _LazyStringCell - - -@pytest.mark.parametrize( - ("cell1", "cell2", "expected"), - [ - (_LazyStringCell(pl.col("a")), _LazyStringCell(pl.col("a")), True), - (_LazyStringCell(pl.col("a")), _LazyStringCell(pl.col("b")), False), - ], - ids=[ - "equal", - "different", - ], -) -def test_should_return_whether_two_cells_are_equal(cell1: StringCell, cell2: StringCell, expected: bool) -> None: - assert (cell1._equals(cell2)) == expected - - -def test_should_return_true_if_objects_are_identical() -> None: - cell = _LazyStringCell(pl.col("a")) - assert (cell._equals(cell)) is True - - -@pytest.mark.parametrize( - ("cell", "other"), - [ - (_LazyStringCell(pl.col("a")), None), - (_LazyStringCell(pl.col("a")), Table({})), - ], - ids=[ - "Cell vs. None", - "Cell vs. Table", - ], -) -def test_should_return_not_implemented_if_other_is_not_cell(cell: StringCell, other: Any) -> None: - assert (cell._equals(other)) is NotImplemented diff --git a/tests/safeds/data/tabular/query/_lazy_string_cell/test_hash.py b/tests/safeds/data/tabular/query/_lazy_string_cell/test_hash.py deleted file mode 100644 index df82c710c..000000000 --- a/tests/safeds/data/tabular/query/_lazy_string_cell/test_hash.py +++ /dev/null @@ -1,25 +0,0 @@ -import polars as pl -import pytest - -from safeds.data.tabular.query import StringCell -from safeds.data.tabular.query._lazy_string_cell import _LazyStringCell - - -def test_should_be_deterministic() -> None: - cell = _LazyStringCell(pl.col("a")) - assert hash(cell) == 8162512882156938440 - - -@pytest.mark.parametrize( - ("cell1", "cell2", "expected"), - [ - (_LazyStringCell(pl.col("a")), _LazyStringCell(pl.col("a")), True), - (_LazyStringCell(pl.col("a")), _LazyStringCell(pl.col("b")), False), - ], - ids=[ - "equal", - "different", - ], -) -def test_should_be_good_hash(cell1: StringCell, cell2: StringCell, expected: bool) -> None: - assert (hash(cell1) == hash(cell2)) == expected diff --git a/tests/safeds/data/tabular/query/_lazy_string_cell/__init__.py b/tests/safeds/data/tabular/query/_lazy_string_operations/__init__.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_string_cell/__init__.py rename to tests/safeds/data/tabular/query/_lazy_string_operations/__init__.py diff --git a/tests/safeds/data/tabular/query/_lazy_string_cell/test_contains.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_contains.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_string_cell/test_contains.py rename to tests/safeds/data/tabular/query/_lazy_string_operations/test_contains.py diff --git a/tests/safeds/data/tabular/query/_lazy_string_cell/test_ends_with.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_ends_with.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_string_cell/test_ends_with.py rename to tests/safeds/data/tabular/query/_lazy_string_operations/test_ends_with.py diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_equals.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_equals.py new file mode 100644 index 000000000..debdac9e8 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_equals.py @@ -0,0 +1,47 @@ +from typing import Any + +import polars as pl +import pytest + +from safeds.data.tabular.containers import Table +from safeds.data.tabular.query import StringOperations +from safeds.data.tabular.query._lazy_string_operations import _LazyStringOperations + + +@pytest.mark.parametrize( + ("cell1", "cell2", "expected"), + [ + (_LazyStringOperations(pl.col("a")), _LazyStringOperations(pl.col("a")), True), + (_LazyStringOperations(pl.col("a")), _LazyStringOperations(pl.col("b")), False), + ], + ids=[ + "equal", + "different", + ], +) +def test_should_return_whether_two_cells_are_equal( + cell1: StringOperations, + cell2: StringOperations, + expected: bool, +) -> None: + assert (cell1._equals(cell2)) == expected + + +def test_should_return_true_if_objects_are_identical() -> None: + cell = _LazyStringOperations(pl.col("a")) + assert (cell._equals(cell)) is True + + +@pytest.mark.parametrize( + ("cell", "other"), + [ + (_LazyStringOperations(pl.col("a")), None), + (_LazyStringOperations(pl.col("a")), Table({})), + ], + ids=[ + "Cell vs. None", + "Cell vs. Table", + ], +) +def test_should_return_not_implemented_if_other_is_not_cell(cell: StringOperations, other: Any) -> None: + assert (cell._equals(other)) is NotImplemented diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_hash.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_hash.py new file mode 100644 index 000000000..40cd6f04c --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_hash.py @@ -0,0 +1,25 @@ +import polars as pl +import pytest + +from safeds.data.tabular.query import StringOperations +from safeds.data.tabular.query._lazy_string_operations import _LazyStringOperations + + +def test_should_be_deterministic() -> None: + cell = _LazyStringOperations(pl.col("a")) + assert hash(cell) == 8162512882156938440 + + +@pytest.mark.parametrize( + ("cell1", "cell2", "expected"), + [ + (_LazyStringOperations(pl.col("a")), _LazyStringOperations(pl.col("a")), True), + (_LazyStringOperations(pl.col("a")), _LazyStringOperations(pl.col("b")), False), + ], + ids=[ + "equal", + "different", + ], +) +def test_should_be_good_hash(cell1: StringOperations, cell2: StringOperations, expected: bool) -> None: + assert (hash(cell1) == hash(cell2)) == expected diff --git a/tests/safeds/data/tabular/query/_lazy_string_cell/test_index_of.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_index_of.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_string_cell/test_index_of.py rename to tests/safeds/data/tabular/query/_lazy_string_operations/test_index_of.py diff --git a/tests/safeds/data/tabular/query/_lazy_string_cell/test_length.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_length.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_string_cell/test_length.py rename to tests/safeds/data/tabular/query/_lazy_string_operations/test_length.py diff --git a/tests/safeds/data/tabular/query/_lazy_string_cell/test_replace.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_replace.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_string_cell/test_replace.py rename to tests/safeds/data/tabular/query/_lazy_string_operations/test_replace.py diff --git a/tests/safeds/data/tabular/query/_lazy_string_cell/test_sizeof.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_sizeof.py similarity index 54% rename from tests/safeds/data/tabular/query/_lazy_string_cell/test_sizeof.py rename to tests/safeds/data/tabular/query/_lazy_string_operations/test_sizeof.py index 9d3822d6e..18c1848fd 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_cell/test_sizeof.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_sizeof.py @@ -2,9 +2,9 @@ import polars as pl -from safeds.data.tabular.query._lazy_string_cell import _LazyStringCell +from safeds.data.tabular.query._lazy_string_operations import _LazyStringOperations def test_should_return_size_greater_than_normal_object() -> None: - cell = _LazyStringCell(pl.col("a")) + cell = _LazyStringOperations(pl.col("a")) assert sys.getsizeof(cell) > sys.getsizeof(object()) diff --git a/tests/safeds/data/tabular/query/_lazy_string_cell/test_starts_with.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_starts_with.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_string_cell/test_starts_with.py rename to tests/safeds/data/tabular/query/_lazy_string_operations/test_starts_with.py diff --git a/tests/safeds/data/tabular/query/_lazy_string_cell/test_substring.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_substring.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_string_cell/test_substring.py rename to tests/safeds/data/tabular/query/_lazy_string_operations/test_substring.py diff --git a/tests/safeds/data/tabular/query/_lazy_string_cell/test_to_date.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_date.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_string_cell/test_to_date.py rename to tests/safeds/data/tabular/query/_lazy_string_operations/test_to_date.py diff --git a/tests/safeds/data/tabular/query/_lazy_string_cell/test_to_datetime.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_datetime.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_string_cell/test_to_datetime.py rename to tests/safeds/data/tabular/query/_lazy_string_operations/test_to_datetime.py diff --git a/tests/safeds/data/tabular/query/_lazy_string_cell/test_to_float.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_float.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_string_cell/test_to_float.py rename to tests/safeds/data/tabular/query/_lazy_string_operations/test_to_float.py diff --git a/tests/safeds/data/tabular/query/_lazy_string_cell/test_to_int.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_int.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_string_cell/test_to_int.py rename to tests/safeds/data/tabular/query/_lazy_string_operations/test_to_int.py diff --git a/tests/safeds/data/tabular/query/_lazy_string_cell/test_to_lowercase.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_lowercase.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_string_cell/test_to_lowercase.py rename to tests/safeds/data/tabular/query/_lazy_string_operations/test_to_lowercase.py diff --git a/tests/safeds/data/tabular/query/_lazy_string_cell/test_to_uppercase.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_uppercase.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_string_cell/test_to_uppercase.py rename to tests/safeds/data/tabular/query/_lazy_string_operations/test_to_uppercase.py diff --git a/tests/safeds/data/tabular/query/_lazy_string_cell/test_trim.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_trim.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_string_cell/test_trim.py rename to tests/safeds/data/tabular/query/_lazy_string_operations/test_trim.py diff --git a/tests/safeds/data/tabular/query/_lazy_string_cell/test_trim_end.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_trim_end.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_string_cell/test_trim_end.py rename to tests/safeds/data/tabular/query/_lazy_string_operations/test_trim_end.py diff --git a/tests/safeds/data/tabular/query/_lazy_string_cell/test_trim_start.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_trim_start.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_string_cell/test_trim_start.py rename to tests/safeds/data/tabular/query/_lazy_string_operations/test_trim_start.py diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_cell/test_equals.py b/tests/safeds/data/tabular/query/_lazy_temporal_cell/test_equals.py deleted file mode 100644 index 257e3368f..000000000 --- a/tests/safeds/data/tabular/query/_lazy_temporal_cell/test_equals.py +++ /dev/null @@ -1,43 +0,0 @@ -from typing import Any - -import polars as pl -import pytest - -from safeds.data.tabular.containers import Table -from safeds.data.tabular.query import TemporalCell -from safeds.data.tabular.query._lazy_temporal_cell import _LazyTemporalCell - - -@pytest.mark.parametrize( - ("cell1", "cell2", "expected"), - [ - (_LazyTemporalCell(pl.col("a")), _LazyTemporalCell(pl.col("a")), True), - (_LazyTemporalCell(pl.col("a")), _LazyTemporalCell(pl.col("b")), False), - ], - ids=[ - "equal", - "different", - ], -) -def test_should_return_whether_two_cells_are_equal(cell1: TemporalCell, cell2: TemporalCell, expected: bool) -> None: - assert (cell1._equals(cell2)) == expected - - -def test_should_return_true_if_objects_are_identical() -> None: - cell = _LazyTemporalCell(pl.col("a")) - assert (cell._equals(cell)) is True - - -@pytest.mark.parametrize( - ("cell", "other"), - [ - (_LazyTemporalCell(pl.col("a")), None), - (_LazyTemporalCell(pl.col("a")), Table({})), - ], - ids=[ - "Cell vs. None", - "Cell vs. Table", - ], -) -def test_should_return_not_implemented_if_other_is_not_cell(cell: TemporalCell, other: Any) -> None: - assert (cell._equals(other)) is NotImplemented diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_cell/test_hash.py b/tests/safeds/data/tabular/query/_lazy_temporal_cell/test_hash.py deleted file mode 100644 index dd9a997f9..000000000 --- a/tests/safeds/data/tabular/query/_lazy_temporal_cell/test_hash.py +++ /dev/null @@ -1,25 +0,0 @@ -import polars as pl -import pytest - -from safeds.data.tabular.query import TemporalCell -from safeds.data.tabular.query._lazy_temporal_cell import _LazyTemporalCell - - -def test_should_be_deterministic() -> None: - cell = _LazyTemporalCell(pl.col("a")) - assert hash(cell) == 8162512882156938440 - - -@pytest.mark.parametrize( - ("cell1", "cell2", "expected"), - [ - (_LazyTemporalCell(pl.col("a")), _LazyTemporalCell(pl.col("a")), True), - (_LazyTemporalCell(pl.col("a")), _LazyTemporalCell(pl.col("b")), False), - ], - ids=[ - "equal", - "different", - ], -) -def test_should_be_good_hash(cell1: TemporalCell, cell2: TemporalCell, expected: bool) -> None: - assert (hash(cell1) == hash(cell2)) == expected diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_cell/__init__.py b/tests/safeds/data/tabular/query/_lazy_temporal_operations/__init__.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_temporal_cell/__init__.py rename to tests/safeds/data/tabular/query/_lazy_temporal_operations/__init__.py diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_cell/test_century.py b/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_century.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_temporal_cell/test_century.py rename to tests/safeds/data/tabular/query/_lazy_temporal_operations/test_century.py diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_cell/test_date_to_string.py b/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_date_to_string.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_temporal_cell/test_date_to_string.py rename to tests/safeds/data/tabular/query/_lazy_temporal_operations/test_date_to_string.py diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_cell/test_datetime_to_string.py b/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_datetime_to_string.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_temporal_cell/test_datetime_to_string.py rename to tests/safeds/data/tabular/query/_lazy_temporal_operations/test_datetime_to_string.py diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_cell/test_day.py b/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_day.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_temporal_cell/test_day.py rename to tests/safeds/data/tabular/query/_lazy_temporal_operations/test_day.py diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_equals.py b/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_equals.py new file mode 100644 index 000000000..13b85af1a --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_equals.py @@ -0,0 +1,47 @@ +from typing import Any + +import polars as pl +import pytest + +from safeds.data.tabular.containers import Table +from safeds.data.tabular.query import TemporalOperations +from safeds.data.tabular.query._lazy_temporal_operations import _LazyTemporalOperations + + +@pytest.mark.parametrize( + ("cell1", "cell2", "expected"), + [ + (_LazyTemporalOperations(pl.col("a")), _LazyTemporalOperations(pl.col("a")), True), + (_LazyTemporalOperations(pl.col("a")), _LazyTemporalOperations(pl.col("b")), False), + ], + ids=[ + "equal", + "different", + ], +) +def test_should_return_whether_two_cells_are_equal( + cell1: TemporalOperations, + cell2: TemporalOperations, + expected: bool, +) -> None: + assert (cell1._equals(cell2)) == expected + + +def test_should_return_true_if_objects_are_identical() -> None: + cell = _LazyTemporalOperations(pl.col("a")) + assert (cell._equals(cell)) is True + + +@pytest.mark.parametrize( + ("cell", "other"), + [ + (_LazyTemporalOperations(pl.col("a")), None), + (_LazyTemporalOperations(pl.col("a")), Table({})), + ], + ids=[ + "Cell vs. None", + "Cell vs. Table", + ], +) +def test_should_return_not_implemented_if_other_is_not_cell(cell: TemporalOperations, other: Any) -> None: + assert (cell._equals(other)) is NotImplemented diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_hash.py b/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_hash.py new file mode 100644 index 000000000..db26be9f4 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_hash.py @@ -0,0 +1,25 @@ +import polars as pl +import pytest + +from safeds.data.tabular.query import TemporalOperations +from safeds.data.tabular.query._lazy_temporal_operations import _LazyTemporalOperations + + +def test_should_be_deterministic() -> None: + cell = _LazyTemporalOperations(pl.col("a")) + assert hash(cell) == 8162512882156938440 + + +@pytest.mark.parametrize( + ("cell1", "cell2", "expected"), + [ + (_LazyTemporalOperations(pl.col("a")), _LazyTemporalOperations(pl.col("a")), True), + (_LazyTemporalOperations(pl.col("a")), _LazyTemporalOperations(pl.col("b")), False), + ], + ids=[ + "equal", + "different", + ], +) +def test_should_be_good_hash(cell1: TemporalOperations, cell2: TemporalOperations, expected: bool) -> None: + assert (hash(cell1) == hash(cell2)) == expected diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_cell/test_month.py b/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_month.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_temporal_cell/test_month.py rename to tests/safeds/data/tabular/query/_lazy_temporal_operations/test_month.py diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_cell/test_sizeof.py b/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_sizeof.py similarity index 53% rename from tests/safeds/data/tabular/query/_lazy_temporal_cell/test_sizeof.py rename to tests/safeds/data/tabular/query/_lazy_temporal_operations/test_sizeof.py index 1bf058b31..f034834b0 100644 --- a/tests/safeds/data/tabular/query/_lazy_temporal_cell/test_sizeof.py +++ b/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_sizeof.py @@ -2,9 +2,9 @@ import polars as pl -from safeds.data.tabular.query._lazy_temporal_cell import _LazyTemporalCell +from safeds.data.tabular.query._lazy_temporal_operations import _LazyTemporalOperations def test_should_return_size_greater_than_normal_object() -> None: - cell = _LazyTemporalCell(pl.col("a")) + cell = _LazyTemporalOperations(pl.col("a")) assert sys.getsizeof(cell) > sys.getsizeof(object()) diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_cell/test_week.py b/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_week.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_temporal_cell/test_week.py rename to tests/safeds/data/tabular/query/_lazy_temporal_operations/test_week.py diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_cell/test_weekday.py b/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_weekday.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_temporal_cell/test_weekday.py rename to tests/safeds/data/tabular/query/_lazy_temporal_operations/test_weekday.py diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_cell/test_year.py b/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_year.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_temporal_cell/test_year.py rename to tests/safeds/data/tabular/query/_lazy_temporal_operations/test_year.py From 160c334f3cb8d70bf39e9777e7fa8109ea62f1f8 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Wed, 15 Jan 2025 19:27:36 +0100 Subject: [PATCH 03/57] chore: more readable output of `_LazyCell.__repr__` --- src/safeds/data/tabular/containers/_lazy_cell.py | 2 +- .../safeds/data/tabular/containers/_lazy_cell/test_repr.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/safeds/data/tabular/containers/_lazy_cell.py b/src/safeds/data/tabular/containers/_lazy_cell.py index 0d770a117..6f5190104 100644 --- a/src/safeds/data/tabular/containers/_lazy_cell.py +++ b/src/safeds/data/tabular/containers/_lazy_cell.py @@ -166,7 +166,7 @@ def __hash__(self) -> int: return _structural_hash(self._expression.meta.serialize()) def __repr__(self) -> str: - return self._expression.__repr__() + return f"_LazyCell({self._expression})" def __sizeof__(self) -> int: return self._expression.__sizeof__() diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_repr.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_repr.py index 91313f5ad..44aad275e 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_repr.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_repr.py @@ -10,11 +10,11 @@ [ ( Cell.constant(1), - "dyn int: 1", + "_LazyCell(dyn int: 1)", ), ( _LazyCell(pl.col("a")), - 'col("a")', + '_LazyCell(col("a"))', ), ], ids=[ @@ -24,4 +24,4 @@ ) def test_should_return_a_string_representation(cell: Cell, expected: str) -> None: # We do not care about the exact string representation, this is only for debugging - assert expected in repr(cell) + assert repr(cell) == expected From fd3f1c1a73ddf06f2f64dc5cad935d8a5dff07ca Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Wed, 15 Jan 2025 19:37:09 +0100 Subject: [PATCH 04/57] chore: add `__eq__`, `__repr__`, and `__str__` methods --- .../tabular/query/_lazy_string_operations.py | 24 +++++++------ .../query/_lazy_temporal_operations.py | 24 +++++++------ .../data/tabular/query/_string_operations.py | 35 ++++++++++++------- .../tabular/query/_temporal_operations.py | 35 ++++++++++++------- .../{test_equals.py => test_eq.py} | 6 ++-- .../_lazy_string_operations/test_repr.py | 28 +++++++++++++++ .../query/_lazy_string_operations/test_str.py | 28 +++++++++++++++ .../{test_equals.py => test_eq.py} | 6 ++-- .../_lazy_temporal_operations/test_repr.py | 28 +++++++++++++++ .../_lazy_temporal_operations/test_str.py | 28 +++++++++++++++ 10 files changed, 190 insertions(+), 52 deletions(-) rename tests/safeds/data/tabular/query/_lazy_string_operations/{test_equals.py => test_eq.py} (89%) create mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_repr.py create mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_str.py rename tests/safeds/data/tabular/query/_lazy_temporal_operations/{test_equals.py => test_eq.py} (89%) create mode 100644 tests/safeds/data/tabular/query/_lazy_temporal_operations/test_repr.py create mode 100644 tests/safeds/data/tabular/query/_lazy_temporal_operations/test_str.py diff --git a/src/safeds/data/tabular/query/_lazy_string_operations.py b/src/safeds/data/tabular/query/_lazy_string_operations.py index c44fc6c6b..e0c603984 100644 --- a/src/safeds/data/tabular/query/_lazy_string_operations.py +++ b/src/safeds/data/tabular/query/_lazy_string_operations.py @@ -24,12 +24,25 @@ class _LazyStringOperations(StringOperations): def __init__(self, expression: pl.Expr) -> None: self._expression: pl.Expr = expression + def __eq__(self, other: object) -> bool: + if not isinstance(other, _LazyStringOperations): + return NotImplemented + if self is other: + return True + return self._expression.meta.eq(other._expression) + def __hash__(self) -> int: return _structural_hash(self._expression.meta.serialize()) + def __repr__(self) -> str: + return f"_LazyStringOperations({self._expression})" + def __sizeof__(self) -> int: return self._expression.__sizeof__() + def __str__(self) -> str: + return f"({self._expression}).str" + # ------------------------------------------------------------------------------------------------------------------ # String operations # ------------------------------------------------------------------------------------------------------------------ @@ -88,14 +101,3 @@ def trim_end(self) -> Cell[str | None]: def trim_start(self) -> Cell[str | None]: return _LazyCell(self._expression.str.strip_chars_start()) - - # ------------------------------------------------------------------------------------------------------------------ - # Internal - # ------------------------------------------------------------------------------------------------------------------ - - def _equals(self, other: object) -> bool: - if not isinstance(other, _LazyStringOperations): - return NotImplemented - if self is other: - return True - return self._expression.meta.eq(other._expression) diff --git a/src/safeds/data/tabular/query/_lazy_temporal_operations.py b/src/safeds/data/tabular/query/_lazy_temporal_operations.py index 45bf1e64a..829fa76ec 100644 --- a/src/safeds/data/tabular/query/_lazy_temporal_operations.py +++ b/src/safeds/data/tabular/query/_lazy_temporal_operations.py @@ -21,12 +21,25 @@ class _LazyTemporalOperations(TemporalOperations): def __init__(self, expression: pl.Expr) -> None: self._expression: pl.Expr = expression + def __eq__(self, other: object) -> bool: + if not isinstance(other, _LazyTemporalOperations): + return NotImplemented + if self is other: + return True + return self._expression.meta.eq(other._expression) + def __hash__(self) -> int: return _structural_hash(self._expression.meta.serialize()) + def __repr__(self) -> str: + return f"_LazyTemporalOperations({self._expression})" + def __sizeof__(self) -> int: return self._expression.__sizeof__() + def __str__(self) -> str: + return f"({self._expression}).dt" + # ------------------------------------------------------------------------------------------------------------------ # Temporal operations # ------------------------------------------------------------------------------------------------------------------ @@ -60,17 +73,6 @@ def date_to_string(self, format_string: str = "%F") -> Cell[str | None]: raise ValueError("Invalid format string") return _LazyCell(self._expression.dt.to_string(format=format_string)) - # ------------------------------------------------------------------------------------------------------------------ - # Internal - # ------------------------------------------------------------------------------------------------------------------ - - def _equals(self, other: object) -> bool: - if not isinstance(other, _LazyTemporalOperations): - return NotImplemented - if self is other: - return True - return self._expression.meta.eq(other._expression) - def _check_format_string(format_string: str) -> bool: valid_format_codes = { diff --git a/src/safeds/data/tabular/query/_string_operations.py b/src/safeds/data/tabular/query/_string_operations.py index 3ac34e462..10e9b2493 100644 --- a/src/safeds/data/tabular/query/_string_operations.py +++ b/src/safeds/data/tabular/query/_string_operations.py @@ -33,6 +33,29 @@ class StringOperations(ABC): +---------+ """ + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + + @abstractmethod + def __eq__(self, other: object) -> bool: ... + + @abstractmethod + def __hash__(self) -> int: ... + + @abstractmethod + def __repr__(self) -> str: ... + + @abstractmethod + def __sizeof__(self) -> int: ... + + @abstractmethod + def __str__(self) -> str: ... + + # ------------------------------------------------------------------------------------------------------------------ + # String operations + # ------------------------------------------------------------------------------------------------------------------ + @abstractmethod def contains(self, substring: str) -> Cell[bool | None]: """ @@ -493,15 +516,3 @@ def trim_start(self) -> Cell[str | None]: | abc | +---------+ """ - - # ------------------------------------------------------------------------------------------------------------------ - # Internal - # ------------------------------------------------------------------------------------------------------------------ - - @abstractmethod - def _equals(self, other: object) -> bool: - """ - Check if this cell is equal to another object. - - This method is needed because the `__eq__` method is used for element-wise comparisons. - """ diff --git a/src/safeds/data/tabular/query/_temporal_operations.py b/src/safeds/data/tabular/query/_temporal_operations.py index 157f5ba0c..3889735a6 100644 --- a/src/safeds/data/tabular/query/_temporal_operations.py +++ b/src/safeds/data/tabular/query/_temporal_operations.py @@ -31,6 +31,29 @@ class TemporalOperations(ABC): +------------+ """ + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + + @abstractmethod + def __eq__(self, other: object) -> bool: ... + + @abstractmethod + def __hash__(self) -> int: ... + + @abstractmethod + def __repr__(self) -> str: ... + + @abstractmethod + def __sizeof__(self) -> int: ... + + @abstractmethod + def __str__(self) -> str: ... + + # ------------------------------------------------------------------------------------------------------------------ + # String operations + # ------------------------------------------------------------------------------------------------------------------ + @abstractmethod def century(self) -> Cell[int | None]: """ @@ -243,15 +266,3 @@ def date_to_string(self, format_string: str = "%F") -> Cell[str | None]: | 2022-01-09 | +------------+ """ - - # ------------------------------------------------------------------------------------------------------------------ - # Internal - # ------------------------------------------------------------------------------------------------------------------ - - @abstractmethod - def _equals(self, other: object) -> bool: - """ - Check if this cell is equal to another object. - - This method is needed because the `__eq__` method is used for element-wise comparisons. - """ diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_equals.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_eq.py similarity index 89% rename from tests/safeds/data/tabular/query/_lazy_string_operations/test_equals.py rename to tests/safeds/data/tabular/query/_lazy_string_operations/test_eq.py index debdac9e8..1134d8456 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_equals.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_eq.py @@ -24,12 +24,12 @@ def test_should_return_whether_two_cells_are_equal( cell2: StringOperations, expected: bool, ) -> None: - assert (cell1._equals(cell2)) == expected + assert (cell1.__eq__(cell2)) == expected def test_should_return_true_if_objects_are_identical() -> None: cell = _LazyStringOperations(pl.col("a")) - assert (cell._equals(cell)) is True + assert (cell.__eq__(cell)) is True @pytest.mark.parametrize( @@ -44,4 +44,4 @@ def test_should_return_true_if_objects_are_identical() -> None: ], ) def test_should_return_not_implemented_if_other_is_not_cell(cell: StringOperations, other: Any) -> None: - assert (cell._equals(other)) is NotImplemented + assert (cell.__eq__(other)) is NotImplemented diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_repr.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_repr.py new file mode 100644 index 000000000..5265d7cf6 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_repr.py @@ -0,0 +1,28 @@ +import polars as pl +import pytest + +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import StringOperations + + +@pytest.mark.parametrize( + ("cell", "expected"), + [ + ( + Cell.constant("a").str, + "_LazyStringOperations(String(a))", + ), + ( + _LazyCell(pl.col("a")).str, + '_LazyStringOperations(col("a"))', + ), + ], + ids=[ + "constant", + "column", + ], +) +def test_should_return_a_string_representation(cell: StringOperations, expected: str) -> None: + # We do not care about the exact string representation, this is only for debugging + assert repr(cell) == expected diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_str.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_str.py new file mode 100644 index 000000000..9d935162c --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_str.py @@ -0,0 +1,28 @@ +import polars as pl +import pytest + +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import StringOperations + + +@pytest.mark.parametrize( + ("cell", "expected"), + [ + ( + Cell.constant("a").str, + "(String(a)).str", + ), + ( + _LazyCell(pl.col("a")).str, + '(col("a")).str', + ), + ], + ids=[ + "constant", + "column", + ], +) +def test_should_return_a_string_representation(cell: StringOperations, expected: str) -> None: + # We do not care about the exact string representation, this is only for debugging + assert str(cell) == expected diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_equals.py b/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_eq.py similarity index 89% rename from tests/safeds/data/tabular/query/_lazy_temporal_operations/test_equals.py rename to tests/safeds/data/tabular/query/_lazy_temporal_operations/test_eq.py index 13b85af1a..71ed031b6 100644 --- a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_equals.py +++ b/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_eq.py @@ -24,12 +24,12 @@ def test_should_return_whether_two_cells_are_equal( cell2: TemporalOperations, expected: bool, ) -> None: - assert (cell1._equals(cell2)) == expected + assert (cell1.__eq__(cell2)) == expected def test_should_return_true_if_objects_are_identical() -> None: cell = _LazyTemporalOperations(pl.col("a")) - assert (cell._equals(cell)) is True + assert (cell.__eq__(cell)) is True @pytest.mark.parametrize( @@ -44,4 +44,4 @@ def test_should_return_true_if_objects_are_identical() -> None: ], ) def test_should_return_not_implemented_if_other_is_not_cell(cell: TemporalOperations, other: Any) -> None: - assert (cell._equals(other)) is NotImplemented + assert (cell.__eq__(other)) is NotImplemented diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_repr.py b/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_repr.py new file mode 100644 index 000000000..d46dbd1fa --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_repr.py @@ -0,0 +1,28 @@ +import polars as pl +import pytest + +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import TemporalOperations + + +@pytest.mark.parametrize( + ("cell", "expected"), + [ + ( + Cell.duration(hours=1).dt, + '_LazyTemporalOperations(1h.alias("duration"))', + ), + ( + _LazyCell(pl.col("a")).dt, + '_LazyTemporalOperations(col("a"))', + ), + ], + ids=[ + "duration", + "column", + ], +) +def test_should_return_a_string_representation(cell: TemporalOperations, expected: str) -> None: + # We do not care about the exact string representation, this is only for debugging + assert repr(cell) == expected diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_str.py b/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_str.py new file mode 100644 index 000000000..e4915b4ea --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_str.py @@ -0,0 +1,28 @@ +import polars as pl +import pytest + +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import TemporalOperations + + +@pytest.mark.parametrize( + ("cell", "expected"), + [ + ( + Cell.duration(hours=1).dt, + '(1h.alias("duration")).dt', + ), + ( + _LazyCell(pl.col("a")).dt, + '(col("a")).dt', + ), + ], + ids=[ + "constant", + "column", + ], +) +def test_should_return_a_string_representation(cell: TemporalOperations, expected: str) -> None: + # We do not care about the exact string representation, this is only for debugging + assert str(cell) == expected From 5883d5466d4011a1628d884044bf2cb83c8b944f Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Wed, 15 Jan 2025 19:42:52 +0100 Subject: [PATCH 05/57] chore: check `__sizeof__` --- .../_lazy_string_operations/test_repr.py | 6 +++--- .../_lazy_string_operations/test_sizeof.py | 21 +++++++++++++++---- .../query/_lazy_string_operations/test_str.py | 6 +++--- .../_lazy_temporal_operations/test_repr.py | 6 +++--- .../_lazy_temporal_operations/test_sizeof.py | 21 +++++++++++++++---- .../_lazy_temporal_operations/test_str.py | 6 +++--- 6 files changed, 46 insertions(+), 20 deletions(-) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_repr.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_repr.py index 5265d7cf6..757dd1aa1 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_repr.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_repr.py @@ -7,7 +7,7 @@ @pytest.mark.parametrize( - ("cell", "expected"), + ("ops", "expected"), [ ( Cell.constant("a").str, @@ -23,6 +23,6 @@ "column", ], ) -def test_should_return_a_string_representation(cell: StringOperations, expected: str) -> None: +def test_should_return_a_string_representation(ops: StringOperations, expected: str) -> None: # We do not care about the exact string representation, this is only for debugging - assert repr(cell) == expected + assert repr(ops) == expected diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_sizeof.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_sizeof.py index 18c1848fd..8293e5f0b 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_sizeof.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_sizeof.py @@ -1,10 +1,23 @@ import sys import polars as pl +import pytest -from safeds.data.tabular.query._lazy_string_operations import _LazyStringOperations +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import StringOperations -def test_should_return_size_greater_than_normal_object() -> None: - cell = _LazyStringOperations(pl.col("a")) - assert sys.getsizeof(cell) > sys.getsizeof(object()) +@pytest.mark.parametrize( + "ops", + [ + Cell.constant("a").str, + _LazyCell(pl.col("a")).str, + ], + ids=[ + "constant", + "column", + ], +) +def test_should_be_larger_than_normal_object(ops: StringOperations) -> None: + assert sys.getsizeof(ops) > sys.getsizeof(object()) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_str.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_str.py index 9d935162c..b23027814 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_str.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_str.py @@ -7,7 +7,7 @@ @pytest.mark.parametrize( - ("cell", "expected"), + ("ops", "expected"), [ ( Cell.constant("a").str, @@ -23,6 +23,6 @@ "column", ], ) -def test_should_return_a_string_representation(cell: StringOperations, expected: str) -> None: +def test_should_return_a_string_representation(ops: StringOperations, expected: str) -> None: # We do not care about the exact string representation, this is only for debugging - assert str(cell) == expected + assert str(ops) == expected diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_repr.py b/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_repr.py index d46dbd1fa..1b78568f2 100644 --- a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_repr.py +++ b/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_repr.py @@ -7,7 +7,7 @@ @pytest.mark.parametrize( - ("cell", "expected"), + ("ops", "expected"), [ ( Cell.duration(hours=1).dt, @@ -23,6 +23,6 @@ "column", ], ) -def test_should_return_a_string_representation(cell: TemporalOperations, expected: str) -> None: +def test_should_return_a_string_representation(ops: TemporalOperations, expected: str) -> None: # We do not care about the exact string representation, this is only for debugging - assert repr(cell) == expected + assert repr(ops) == expected diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_sizeof.py b/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_sizeof.py index f034834b0..9907cf70a 100644 --- a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_sizeof.py +++ b/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_sizeof.py @@ -1,10 +1,23 @@ import sys import polars as pl +import pytest -from safeds.data.tabular.query._lazy_temporal_operations import _LazyTemporalOperations +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import TemporalOperations -def test_should_return_size_greater_than_normal_object() -> None: - cell = _LazyTemporalOperations(pl.col("a")) - assert sys.getsizeof(cell) > sys.getsizeof(object()) +@pytest.mark.parametrize( + "ops", + [ + Cell.duration(hours=1).dt, + _LazyCell(pl.col("a")).dt, + ], + ids=[ + "duration", + "column", + ], +) +def test_should_be_larger_than_normal_object(ops: TemporalOperations) -> None: + assert sys.getsizeof(ops) > sys.getsizeof(object()) diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_str.py b/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_str.py index e4915b4ea..3fa78713c 100644 --- a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_str.py +++ b/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_str.py @@ -7,7 +7,7 @@ @pytest.mark.parametrize( - ("cell", "expected"), + ("ops", "expected"), [ ( Cell.duration(hours=1).dt, @@ -23,6 +23,6 @@ "column", ], ) -def test_should_return_a_string_representation(cell: TemporalOperations, expected: str) -> None: +def test_should_return_a_string_representation(ops: TemporalOperations, expected: str) -> None: # We do not care about the exact string representation, this is only for debugging - assert str(cell) == expected + assert str(ops) == expected From ace9ee87c478659f2f8a7444fa82effcbc35619e Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Wed, 15 Jan 2025 19:58:51 +0100 Subject: [PATCH 06/57] chore: check `__hash__` --- .../__snapshots__/test_hash.ambr | 7 +++ .../_lazy_string_operations/test_hash.py | 61 +++++++++++++++---- .../__snapshots__/test_hash.ambr | 7 +++ .../_lazy_temporal_operations/test_hash.py | 61 +++++++++++++++---- 4 files changed, 114 insertions(+), 22 deletions(-) create mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/__snapshots__/test_hash.ambr create mode 100644 tests/safeds/data/tabular/query/_lazy_temporal_operations/__snapshots__/test_hash.ambr diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/__snapshots__/test_hash.ambr b/tests/safeds/data/tabular/query/_lazy_string_operations/__snapshots__/test_hash.ambr new file mode 100644 index 000000000..cdae41257 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/__snapshots__/test_hash.ambr @@ -0,0 +1,7 @@ +# serializer version: 1 +# name: TestContract.test_should_return_same_hash_in_different_processes[column] + 8162512882156938440 +# --- +# name: TestContract.test_should_return_same_hash_in_different_processes[constant] + 6067426592045063520 +# --- diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_hash.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_hash.py index 40cd6f04c..837f1bf32 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_hash.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_hash.py @@ -1,25 +1,64 @@ +from collections.abc import Callable + import polars as pl import pytest +from syrupy import SnapshotAssertion +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.containers._lazy_cell import _LazyCell from safeds.data.tabular.query import StringOperations -from safeds.data.tabular.query._lazy_string_operations import _LazyStringOperations -def test_should_be_deterministic() -> None: - cell = _LazyStringOperations(pl.col("a")) - assert hash(cell) == 8162512882156938440 +@pytest.mark.parametrize( + "ops_factory", + [ + lambda: Cell.constant("a").str, + lambda: _LazyCell(pl.col("a")).str, + ], + ids=[ + "constant", + "column", + ], +) +class TestContract: + def test_should_return_same_hash_for_equal_objects(self, ops_factory: Callable[[], StringOperations]) -> None: + ops_1 = ops_factory() + ops_2 = ops_factory() + assert hash(ops_1) == hash(ops_2) + + def test_should_return_same_hash_in_different_processes( + self, + ops_factory: Callable[[], StringOperations], + snapshot: SnapshotAssertion, + ) -> None: + ops = ops_factory() + assert hash(ops) == snapshot @pytest.mark.parametrize( - ("cell1", "cell2", "expected"), + ("ops_1", "ops_2"), [ - (_LazyStringOperations(pl.col("a")), _LazyStringOperations(pl.col("a")), True), - (_LazyStringOperations(pl.col("a")), _LazyStringOperations(pl.col("b")), False), + # different constant value + ( + Cell.constant("a").str, + Cell.constant("b").str, + ), + # different column + ( + _LazyCell(pl.col("a")).str, + _LazyCell(pl.col("b")).str, + ), + # different cell kinds + ( + Cell.constant("a").str, + _LazyCell(pl.col("a")).str, + ), ], ids=[ - "equal", - "different", + "different constant value", + "different column", + "different cell kinds", ], ) -def test_should_be_good_hash(cell1: StringOperations, cell2: StringOperations, expected: bool) -> None: - assert (hash(cell1) == hash(cell2)) == expected +def test_should_be_good_hash(ops_1: StringOperations, ops_2: StringOperations) -> None: + assert hash(ops_1) != hash(ops_2) diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_operations/__snapshots__/test_hash.ambr b/tests/safeds/data/tabular/query/_lazy_temporal_operations/__snapshots__/test_hash.ambr new file mode 100644 index 000000000..99b9cb6b3 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_temporal_operations/__snapshots__/test_hash.ambr @@ -0,0 +1,7 @@ +# serializer version: 1 +# name: TestContract.test_should_return_same_hash_in_different_processes[column] + 8162512882156938440 +# --- +# name: TestContract.test_should_return_same_hash_in_different_processes[duration] + 2005674043565732975 +# --- diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_hash.py b/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_hash.py index db26be9f4..75d5d5d96 100644 --- a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_hash.py +++ b/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_hash.py @@ -1,25 +1,64 @@ +from collections.abc import Callable + import polars as pl import pytest +from syrupy import SnapshotAssertion +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.containers._lazy_cell import _LazyCell from safeds.data.tabular.query import TemporalOperations -from safeds.data.tabular.query._lazy_temporal_operations import _LazyTemporalOperations -def test_should_be_deterministic() -> None: - cell = _LazyTemporalOperations(pl.col("a")) - assert hash(cell) == 8162512882156938440 +@pytest.mark.parametrize( + "ops_factory", + [ + lambda: Cell.duration(hours=1).dt, + lambda: _LazyCell(pl.col("a")).dt, + ], + ids=[ + "duration", + "column", + ], +) +class TestContract: + def test_should_return_same_hash_for_equal_objects(self, ops_factory: Callable[[], TemporalOperations]) -> None: + ops_1 = ops_factory() + ops_2 = ops_factory() + assert hash(ops_1) == hash(ops_2) + + def test_should_return_same_hash_in_different_processes( + self, + ops_factory: Callable[[], TemporalOperations], + snapshot: SnapshotAssertion, + ) -> None: + ops = ops_factory() + assert hash(ops) == snapshot @pytest.mark.parametrize( - ("cell1", "cell2", "expected"), + ("ops_1", "ops_2"), [ - (_LazyTemporalOperations(pl.col("a")), _LazyTemporalOperations(pl.col("a")), True), - (_LazyTemporalOperations(pl.col("a")), _LazyTemporalOperations(pl.col("b")), False), + # different durations + ( + Cell.duration(hours=1).dt, + Cell.duration(hours=2).dt, + ), + # different column + ( + _LazyCell(pl.col("a")).dt, + _LazyCell(pl.col("b")).dt, + ), + # different cell kinds + ( + Cell.duration(hours=1).dt, + _LazyCell(pl.col("a")).dt, + ), ], ids=[ - "equal", - "different", + "different durations", + "different column", + "different cell kinds", ], ) -def test_should_be_good_hash(cell1: TemporalOperations, cell2: TemporalOperations, expected: bool) -> None: - assert (hash(cell1) == hash(cell2)) == expected +def test_should_be_good_hash(ops_1: TemporalOperations, ops_2: TemporalOperations) -> None: + assert hash(ops_1) != hash(ops_2) From 422717494f59de951cac5c749198513299d3a96f Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Wed, 15 Jan 2025 20:11:20 +0100 Subject: [PATCH 07/57] chore: check `__eq__` --- .../containers/_lazy_cell/test_equals.py | 20 ++--- .../containers/_lazy_cell/test_hash.py | 20 ++--- .../query/_lazy_string_operations/test_eq.py | 85 ++++++++++++++----- .../_lazy_string_operations/test_hash.py | 8 +- .../_lazy_temporal_operations/test_eq.py | 85 ++++++++++++++----- .../_lazy_temporal_operations/test_hash.py | 4 +- 6 files changed, 154 insertions(+), 68 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_equals.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_equals.py index 023cdf615..df3bb8f83 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_equals.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_equals.py @@ -34,31 +34,31 @@ _LazyCell(pl.col("a")), True, ), - # not equal (different constant value) + # not equal (different constant values) ( Cell.constant(1), Cell.constant(2), False, ), - # not equal (different constant type) + # not equal (different constant types) ( Cell.constant(1), Cell.constant("1"), False, ), - # not equal (different date, int) + # not equal (different dates, int) ( Cell.date(2025, 1, 15), Cell.date(2024, 1, 15), False, ), - # not equal (different date, column) + # not equal (different dates, column) ( Cell.date(_LazyCell(pl.col("a")), 1, 15), Cell.date(_LazyCell(pl.col("b")), 1, 15), False, ), - # not equal (different column) + # not equal (different columns) ( _LazyCell(pl.col("a")), _LazyCell(pl.col("b")), @@ -78,11 +78,11 @@ "equal (date, column)", "equal (column)", # Not equal - "not equal (different constant value)", - "not equal (different constant type)", - "not equal (different date, int)", - "not equal (different date, column)", - "not equal (different column)", + "not equal (different constant values)", + "not equal (different constant types)", + "not equal (different dates, int)", + "not equal (different dates, column)", + "not equal (different columns)", "not equal (different cell kinds)", ], ) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_hash.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_hash.py index 7c5a6d312..55c00fa4f 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_hash.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_hash.py @@ -41,27 +41,27 @@ def test_should_return_same_hash_in_different_processes( @pytest.mark.parametrize( ("cell_1", "cell_2"), [ - # different constant value + # different constant values ( Cell.constant(1), Cell.constant(2), ), - # different constant type + # different constant types ( Cell.constant(1), Cell.constant("1"), ), - # different date, int + # different dates, int ( Cell.date(2025, 1, 15), Cell.date(2024, 1, 15), ), - # different date, column + # different dates, column ( Cell.date(_LazyCell(pl.col("a")), 1, 15), Cell.date(_LazyCell(pl.col("b")), 1, 15), ), - # different column + # different columns ( _LazyCell(pl.col("a")), _LazyCell(pl.col("b")), @@ -73,11 +73,11 @@ def test_should_return_same_hash_in_different_processes( ), ], ids=[ - "different constant value", - "different constant type", - "different date, int", - "different date, column", - "different column", + "different constant values", + "different constant types", + "different dates, int", + "different dates, column", + "different columns", "different cell kinds", ], ) diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_eq.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_eq.py index 1134d8456..4d6ce2779 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_eq.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_eq.py @@ -3,45 +3,88 @@ import polars as pl import pytest -from safeds.data.tabular.containers import Table +from safeds.data.tabular.containers import Cell, Column +from safeds.data.tabular.containers._lazy_cell import _LazyCell from safeds.data.tabular.query import StringOperations -from safeds.data.tabular.query._lazy_string_operations import _LazyStringOperations @pytest.mark.parametrize( - ("cell1", "cell2", "expected"), + ("ops_1", "ops_2", "expected"), [ - (_LazyStringOperations(pl.col("a")), _LazyStringOperations(pl.col("a")), True), - (_LazyStringOperations(pl.col("a")), _LazyStringOperations(pl.col("b")), False), + # equal (constant) + ( + Cell.constant("a").str, + Cell.constant("a").str, + True, + ), + # equal (column) + ( + _LazyCell(pl.col("a")).str, + _LazyCell(pl.col("a")).str, + True, + ), + # not equal (different constant values) + ( + Cell.constant("a").str, + Cell.constant("b").str, + False, + ), + # not equal (different columns) + ( + _LazyCell(pl.col("a")).str, + _LazyCell(pl.col("b")).str, + False, + ), + # not equal (different cell kinds) + ( + Cell.constant("a").str, + _LazyCell(pl.col("a")).str, + False, + ), ], ids=[ - "equal", - "different", + # Equal + "equal (constant)", + "equal (column)", + # Not equal + "not equal (different constant values)", + "not equal (different columns)", + "not equal (different cell kinds)", ], ) -def test_should_return_whether_two_cells_are_equal( - cell1: StringOperations, - cell2: StringOperations, +def test_should_return_whether_objects_are_equal( + ops_1: StringOperations, + ops_2: StringOperations, expected: bool, ) -> None: - assert (cell1.__eq__(cell2)) == expected + assert (ops_1.__eq__(ops_2)) == expected -def test_should_return_true_if_objects_are_identical() -> None: - cell = _LazyStringOperations(pl.col("a")) - assert (cell.__eq__(cell)) is True +@pytest.mark.parametrize( + "ops", + [ + Cell.constant("a").str, + _LazyCell(pl.col("a")).str, + ], + ids=[ + "constant", + "column", + ], +) +def test_should_return_true_if_objects_are_identical(ops: StringOperations) -> None: + assert (ops.__eq__(ops)) is True @pytest.mark.parametrize( - ("cell", "other"), + ("ops", "other"), [ - (_LazyStringOperations(pl.col("a")), None), - (_LazyStringOperations(pl.col("a")), Table({})), + (Cell.constant("a").str, None), + (Cell.constant("a").str, Column("col1", [1])), ], ids=[ - "Cell vs. None", - "Cell vs. Table", + "StringOperations vs. None", + "StringOperations vs. Column", ], ) -def test_should_return_not_implemented_if_other_is_not_cell(cell: StringOperations, other: Any) -> None: - assert (cell.__eq__(other)) is NotImplemented +def test_should_return_not_implemented_if_other_has_different_type(ops: StringOperations, other: Any) -> None: + assert (ops.__eq__(other)) is NotImplemented diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_hash.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_hash.py index 837f1bf32..66de2a7bf 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_hash.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_hash.py @@ -38,12 +38,12 @@ def test_should_return_same_hash_in_different_processes( @pytest.mark.parametrize( ("ops_1", "ops_2"), [ - # different constant value + # different constant values ( Cell.constant("a").str, Cell.constant("b").str, ), - # different column + # different columns ( _LazyCell(pl.col("a")).str, _LazyCell(pl.col("b")).str, @@ -55,8 +55,8 @@ def test_should_return_same_hash_in_different_processes( ), ], ids=[ - "different constant value", - "different column", + "different constant values", + "different columns", "different cell kinds", ], ) diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_eq.py b/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_eq.py index 71ed031b6..87bff8c16 100644 --- a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_eq.py +++ b/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_eq.py @@ -3,45 +3,88 @@ import polars as pl import pytest -from safeds.data.tabular.containers import Table +from safeds.data.tabular.containers import Cell, Column +from safeds.data.tabular.containers._lazy_cell import _LazyCell from safeds.data.tabular.query import TemporalOperations -from safeds.data.tabular.query._lazy_temporal_operations import _LazyTemporalOperations @pytest.mark.parametrize( - ("cell1", "cell2", "expected"), + ("ops_1", "ops_2", "expected"), [ - (_LazyTemporalOperations(pl.col("a")), _LazyTemporalOperations(pl.col("a")), True), - (_LazyTemporalOperations(pl.col("a")), _LazyTemporalOperations(pl.col("b")), False), + # equal (duration) + ( + Cell.duration(hours=1).dt, + Cell.duration(hours=1).dt, + True, + ), + # equal (column) + ( + _LazyCell(pl.col("a")).dt, + _LazyCell(pl.col("a")).dt, + True, + ), + # not equal (different durations) + ( + Cell.duration(hours=1).dt, + Cell.duration(hours=2).dt, + False, + ), + # not equal (different columns) + ( + _LazyCell(pl.col("a")).dt, + _LazyCell(pl.col("b")).dt, + False, + ), + # not equal (different cell kinds) + ( + Cell.duration(hours=1).dt, + _LazyCell(pl.col("a")).dt, + False, + ), ], ids=[ - "equal", - "different", + # Equal + "equal (duration)", + "equal (column)", + # Not equal + "not equal (different durations)", + "not equal (different columns)", + "not equal (different cell kinds)", ], ) -def test_should_return_whether_two_cells_are_equal( - cell1: TemporalOperations, - cell2: TemporalOperations, +def test_should_return_whether_objects_are_equal( + ops_1: TemporalOperations, + ops_2: TemporalOperations, expected: bool, ) -> None: - assert (cell1.__eq__(cell2)) == expected + assert (ops_1.__eq__(ops_2)) == expected -def test_should_return_true_if_objects_are_identical() -> None: - cell = _LazyTemporalOperations(pl.col("a")) - assert (cell.__eq__(cell)) is True +@pytest.mark.parametrize( + "ops", + [ + Cell.duration(hours=1).dt, + _LazyCell(pl.col("a")).dt, + ], + ids=[ + "duration", + "column", + ], +) +def test_should_return_true_if_objects_are_identical(ops: TemporalOperations) -> None: + assert (ops.__eq__(ops)) is True @pytest.mark.parametrize( - ("cell", "other"), + ("ops", "other"), [ - (_LazyTemporalOperations(pl.col("a")), None), - (_LazyTemporalOperations(pl.col("a")), Table({})), + (Cell.duration(hours=1).dt, None), + (Cell.duration(hours=1).dt, Column("col1", [1])), ], ids=[ - "Cell vs. None", - "Cell vs. Table", + "TemporalOperations vs. None", + "TemporalOperations vs. Column", ], ) -def test_should_return_not_implemented_if_other_is_not_cell(cell: TemporalOperations, other: Any) -> None: - assert (cell.__eq__(other)) is NotImplemented +def test_should_return_not_implemented_if_other_has_different_type(ops: TemporalOperations, other: Any) -> None: + assert (ops.__eq__(other)) is NotImplemented diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_hash.py b/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_hash.py index 75d5d5d96..78323a03e 100644 --- a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_hash.py +++ b/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_hash.py @@ -43,7 +43,7 @@ def test_should_return_same_hash_in_different_processes( Cell.duration(hours=1).dt, Cell.duration(hours=2).dt, ), - # different column + # different columns ( _LazyCell(pl.col("a")).dt, _LazyCell(pl.col("b")).dt, @@ -56,7 +56,7 @@ def test_should_return_same_hash_in_different_processes( ], ids=[ "different durations", - "different column", + "different columns", "different cell kinds", ], ) From 3788c78e6f0beba28b455232eb3aa5e203c928cb Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Thu, 16 Jan 2025 12:41:34 +0100 Subject: [PATCH 08/57] feat: remove unneeded trailing underscores from parameter names of public functions --- docs/tutorials/data_processing.ipynb | 58 ++----------------- pyproject.toml | 2 + src/safeds/data/tabular/containers/_column.py | 8 +-- src/safeds/data/tabular/containers/_table.py | 6 +- .../tabular/transformation/_range_scaler.py | 14 ++--- tests/helpers/_assertions.py | 2 +- .../tabular/containers/_column/test_init.py | 4 +- .../transformation/test_range_scaler.py | 6 +- 8 files changed, 26 insertions(+), 74 deletions(-) diff --git a/docs/tutorials/data_processing.ipynb b/docs/tutorials/data_processing.ipynb index c73d6162a..38ea4ce66 100644 --- a/docs/tutorials/data_processing.ipynb +++ b/docs/tutorials/data_processing.ipynb @@ -688,64 +688,14 @@ ] }, { + "metadata": {}, "cell_type": "code", - "execution_count": 11, - "metadata": { - "ExecuteTime": { - "end_time": "2024-05-24T11:02:33.599165800Z", - "start_time": "2024-05-24T11:02:33.479893800Z" - }, - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (10, 12)
idnamesexagesiblings_spousesparents_childrentickettravel_classfarecabinport_embarkedsurvived
i64strstrf64i64i64stri64f64strstri64
0"Abbing, Mr. Anthony""male"0.52400800"C.A. 5547"37.55null"Southampton"0
1"Abbott, Master. Eugene Joseph""male"0.16075102"C.A. 2673"320.25null"Southampton"0
2"Abbott, Mr. Rossmore Edward""male"0.1983311"C.A. 2673"320.25null"Southampton"0
3"Abbott, Mrs. Stanton (Rosa Hun…"female"0.43632511"C.A. 2673"320.25null"Southampton"1
4"Abelseth, Miss. Karen Marie""female"0.1983300"348125"37.65null"Southampton"1
5"Abelseth, Mr. Olaus Jorgensen""male"0.31106400"348122"37.65"F G63""Southampton"1
6"Abelson, Mr. Samuel""male"0.37369510"P/PP 3381"224.0null"Cherbourg"0
7"Abelson, Mrs. Samuel (Hannah W…"female"0.34864310"P/PP 3381"224.0null"Cherbourg"1
8"Abrahamsson, Mr. Abraham Augus…"male"0.24843400"SOTON/O2 3101284"37.925null"Southampton"1
9"Abrahim, Mrs. Joseph (Sophie H…"female"0.22338200"2657"37.2292null"Cherbourg"1
" - ], - "text/plain": [ - "+-----+-----------------------+--------+---------+---+----------+-------+---------------+----------+\n", - "| id | name | sex | age | … | fare | cabin | port_embarked | survived |\n", - "| --- | --- | --- | --- | | --- | --- | --- | --- |\n", - "| i64 | str | str | f64 | | f64 | str | str | i64 |\n", - "+==================================================================================================+\n", - "| 0 | Abbing, Mr. Anthony | male | 0.52401 | … | 7.55000 | null | Southampton | 0 |\n", - "| 1 | Abbott, Master. | male | 0.16075 | … | 20.25000 | null | Southampton | 0 |\n", - "| | Eugene Joseph | | | | | | | |\n", - "| 2 | Abbott, Mr. Rossmore | male | 0.19833 | … | 20.25000 | null | Southampton | 0 |\n", - "| | Edward | | | | | | | |\n", - "| 3 | Abbott, Mrs. Stanton | female | 0.43633 | … | 20.25000 | null | Southampton | 1 |\n", - "| | (Rosa Hun… | | | | | | | |\n", - "| 4 | Abelseth, Miss. Karen | female | 0.19833 | … | 7.65000 | null | Southampton | 1 |\n", - "| | Marie | | | | | | | |\n", - "| 5 | Abelseth, Mr. Olaus | male | 0.31106 | … | 7.65000 | F G63 | Southampton | 1 |\n", - "| | Jorgensen | | | | | | | |\n", - "| 6 | Abelson, Mr. Samuel | male | 0.37369 | … | 24.00000 | null | Cherbourg | 0 |\n", - "| 7 | Abelson, Mrs. Samuel | female | 0.34864 | … | 24.00000 | null | Cherbourg | 1 |\n", - "| | (Hannah W… | | | | | | | |\n", - "| 8 | Abrahamsson, Mr. | male | 0.24843 | … | 7.92500 | null | Southampton | 1 |\n", - "| | Abraham Augus… | | | | | | | |\n", - "| 9 | Abrahim, Mrs. Joseph | female | 0.22338 | … | 7.22920 | null | Cherbourg | 1 |\n", - "| | (Sophie H… | | | | | | | |\n", - "+-----+-----------------------+--------+---------+---+----------+-------+---------------+----------+" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], + "execution_count": null, "source": [ "from safeds.data.tabular.transformation import RangeScaler\n", "\n", - "scaler = RangeScaler(selector=\"age\", min_=0.0, max_=1.0).fit(titanic)\n", + "scaler = RangeScaler(selector=\"age\", min=0.0, max=1.0).fit(titanic)\n", "scaler.transform(titanic_slice)" ] }, diff --git a/pyproject.toml b/pyproject.toml index 1b7df81e4..f2bf9aad1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -146,6 +146,8 @@ ignore = [ "FBT001", # boolean-default-value-in-function-definition (we leave it to the call-site) "FBT002", + # builtin-argument-shadowing (we want readable parameter names in our API) + "A002", # builtin-attribute-shadowing (not an issue) "A003", # implicit-return (can add a return even though all cases are covered) diff --git a/src/safeds/data/tabular/containers/_column.py b/src/safeds/data/tabular/containers/_column.py index 2817a008a..85ac6d44e 100644 --- a/src/safeds/data/tabular/containers/_column.py +++ b/src/safeds/data/tabular/containers/_column.py @@ -44,7 +44,7 @@ class Column(Sequence[T_co]): The name of the column. data: The data of the column. - type_: + type: The type of the column. If `None` (default), the type is inferred from the data. Examples @@ -62,7 +62,7 @@ class Column(Sequence[T_co]): +-----+ >>> from safeds.data.tabular.typing import ColumnType - >>> Column("a", [1, 2, 3], type_=ColumnType.string()) + >>> Column("a", [1, 2, 3], type=ColumnType.string()) +-----+ | a | | --- | @@ -93,12 +93,12 @@ def __init__( name: str, data: Sequence[T_co], *, - type_: ColumnType | None = None, + type: ColumnType | None = None, ) -> None: import polars as pl # Preprocessing - dtype = None if type_ is None else type_._polars_data_type + dtype = None if type is None else type._polars_data_type # Implementation self._series: pl.Series = pl.Series(name, data, dtype=dtype, strict=False) diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index f66cede68..9cb1b8fb1 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -2209,7 +2209,7 @@ def inverse_transform_table(self, fitted_transformer: InvertibleTableTransformer >>> from safeds.data.tabular.containers import Table >>> from safeds.data.tabular.transformation import RangeScaler >>> table = Table({"a": [1, 2, 3]}) - >>> transformer, transformed_table = RangeScaler(min_=0, max_=1).fit_and_transform(table) + >>> transformer, transformed_table = RangeScaler(min=0, max=1).fit_and_transform(table) >>> transformed_table.inverse_transform_table(transformer) +---------+ | a | @@ -2405,7 +2405,7 @@ def transform_table(self, fitted_transformer: TableTransformer) -> Table: >>> from safeds.data.tabular.containers import Table >>> from safeds.data.tabular.transformation import RangeScaler >>> table = Table({"a": [1, 2, 3]}) - >>> transformer = RangeScaler(min_=0, max_=1).fit(table) + >>> transformer = RangeScaler(min=0, max=1).fit(table) >>> table.transform_table(transformer) +---------+ | a | @@ -2591,7 +2591,7 @@ def to_dict(self) -> dict[str, list[Any]]: Returns ------- - dict_: + dict: The dictionary representation of the table. Examples diff --git a/src/safeds/data/tabular/transformation/_range_scaler.py b/src/safeds/data/tabular/transformation/_range_scaler.py index 54def0379..005d77a44 100644 --- a/src/safeds/data/tabular/transformation/_range_scaler.py +++ b/src/safeds/data/tabular/transformation/_range_scaler.py @@ -19,9 +19,9 @@ class RangeScaler(InvertibleTableTransformer): Parameters ---------- - min_: + min: The minimum of the new range after the transformation - max_: + max: The maximum of the new range after the transformation selector: The list of columns used to fit the transformer. If `None`, all numeric columns are used. @@ -36,15 +36,15 @@ class RangeScaler(InvertibleTableTransformer): # Dunder methods # ------------------------------------------------------------------------------------------------------------------ - def __init__(self, *, selector: str | list[str] | None = None, min_: float = 0.0, max_: float = 1.0) -> None: + def __init__(self, *, selector: str | list[str] | None = None, min: float = 0.0, max: float = 1.0) -> None: super().__init__(selector) - if min_ >= max_: + if min >= max: raise ValueError('Parameter "max_" must be greater than parameter "min_".') # Parameters - self._min: float = min_ - self._max: float = max_ + self._min: float = min + self._max: float = max # Internal state self._data_min: pl.DataFrame | None = None @@ -121,7 +121,7 @@ def fit(self, table: Table) -> RangeScaler: _data_max = _safe_collect_lazy_frame(table._lazy_frame.select(column_names).max()) # Create a copy with the learned transformation - result = RangeScaler(min_=self._min, max_=self._max, selector=column_names) + result = RangeScaler(min=self._min, max=self._max, selector=column_names) result._data_min = _data_min result._data_max = _data_max diff --git a/tests/helpers/_assertions.py b/tests/helpers/_assertions.py index 2f7397f9f..773abb22c 100644 --- a/tests/helpers/_assertions.py +++ b/tests/helpers/_assertions.py @@ -84,7 +84,7 @@ def assert_cell_operation_works( The type of the column if the value is `None`. """ type_ = type_if_none if value is None else None - column = Column("A", [value], type_=type_) + column = Column("A", [value], type=type_) transformed_column = column.transform(transformer) actual = transformed_column[0] assert actual == expected, f"Expected {expected}, but got {actual}." diff --git a/tests/safeds/data/tabular/containers/_column/test_init.py b/tests/safeds/data/tabular/containers/_column/test_init.py index fa45c4dcf..b64b93759 100644 --- a/tests/safeds/data/tabular/containers/_column/test_init.py +++ b/tests/safeds/data/tabular/containers/_column/test_init.py @@ -14,7 +14,7 @@ def test_should_store_the_name() -> None: [ (Column("col1", []), []), (Column("col1", [1]), [1]), - (Column("col1", [1], type_=ColumnType.string()), ["1"]), + (Column("col1", [1], type=ColumnType.string()), ["1"]), ], ids=[ "empty", @@ -31,7 +31,7 @@ def test_should_store_the_data(column: Column, expected: list) -> None: [ (Column("col1", []), ColumnType.null()), (Column("col1", [1]), ColumnType.int64()), - (Column("col1", [1], type_=ColumnType.string()), ColumnType.string()), + (Column("col1", [1], type=ColumnType.string()), ColumnType.string()), ], ids=[ "empty", diff --git a/tests/safeds/data/tabular/transformation/test_range_scaler.py b/tests/safeds/data/tabular/transformation/test_range_scaler.py index 55a2fcfb1..1853bb291 100644 --- a/tests/safeds/data/tabular/transformation/test_range_scaler.py +++ b/tests/safeds/data/tabular/transformation/test_range_scaler.py @@ -8,7 +8,7 @@ class TestInit: def test_should_raise_value_error(self) -> None: with pytest.raises(ValueError, match='Parameter "max_" must be greater than parameter "min_".'): - _ = RangeScaler(min_=10, max_=0) + _ = RangeScaler(min=10, max=0) class TestFit: @@ -187,8 +187,8 @@ def test_should_return_fitted_transformer_and_transformed_table_with_correct_ran expected: Table, ) -> None: fitted_transformer, transformed_table = RangeScaler( - min_=-10.0, - max_=10.0, + min=-10.0, + max=10.0, selector=column_names, ).fit_and_transform( table, From c423d356063651e7297fe6d8a58cef42b3e9cb58 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Thu, 16 Jan 2025 14:49:18 +0100 Subject: [PATCH 09/57] feat: update namespaces for cell operations --- src/safeds/data/tabular/containers/_cell.py | 208 +++++++++--------- .../data/tabular/containers/_lazy_cell.py | 164 +++++++------- src/safeds/data/tabular/query/__init__.py | 12 +- .../tabular/containers/_lazy_cell/test_abs.py | 9 +- .../containers/_lazy_cell/test_ceil.py | 9 +- .../containers/_lazy_cell/test_floor.py | 9 +- .../_lazy_duration_operations/__init__.py | 0 .../query/_lazy_math_operations/__init__.py | 0 8 files changed, 206 insertions(+), 205 deletions(-) create mode 100644 tests/safeds/data/tabular/query/_lazy_duration_operations/__init__.py create mode 100644 tests/safeds/data/tabular/query/_lazy_math_operations/__init__.py diff --git a/src/safeds/data/tabular/containers/_cell.py b/src/safeds/data/tabular/containers/_cell.py index 8d6e8e964..3326acc4e 100644 --- a/src/safeds/data/tabular/containers/_cell.py +++ b/src/safeds/data/tabular/containers/_cell.py @@ -14,7 +14,7 @@ _ConvertibleToIntCell, _PythonLiteral, ) - from safeds.data.tabular.query import StringOperations, TemporalOperations + from safeds.data.tabular.query import DatetimeOperations, DurationOperations, MathOperations, StringOperations from safeds.data.tabular.typing import ColumnType T_co = TypeVar("T_co", covariant=True) @@ -25,7 +25,16 @@ class Cell(ABC, Generic[T_co]): """ A single value in a table. - You only need to interact with this class in callbacks passed to higher-order functions. + You only need to interact with this class in callbacks passed to higher-order functions. Most operations are grouped + into namespaces, which are accessed through the following attributes: + + -`dt` (operations on datetime/date/time values) + -`dur` (operations on durations) + -`math` (mathematical operations on numbers) + -`str` (operations on strings) + + This class only has methods that are not specific to a data type (e.g. `cast`), and methods with corresponding + operators (e.g. `add` for `+`). """ # ------------------------------------------------------------------------------------------------------------------ @@ -125,9 +134,9 @@ def date( return _LazyCell( pl.date( - year=_unwrap(year), - month=_unwrap(month), - day=_unwrap(day), + year=_to_polars_expression(year), + month=_to_polars_expression(month), + day=_to_polars_expression(day), ), ) @@ -199,13 +208,13 @@ def datetime( from ._lazy_cell import _LazyCell # circular import - pl_year = _unwrap(year) - pl_month = _unwrap(month) - pl_day = _unwrap(day) - pl_hour = _unwrap(hour) - pl_minute = _unwrap(minute) - pl_second = _unwrap(second) - pl_microsecond = _unwrap(microsecond) + pl_year = _to_polars_expression(year) + pl_month = _to_polars_expression(month) + pl_day = _to_polars_expression(day) + pl_hour = _to_polars_expression(hour) + pl_minute = _to_polars_expression(minute) + pl_second = _to_polars_expression(second) + pl_microsecond = _to_polars_expression(microsecond) # By default, microseconds overflow into seconds return _LazyCell( @@ -284,13 +293,13 @@ def duration( return _LazyCell( pl.duration( - weeks=_unwrap(weeks), - days=_unwrap(days), - hours=_unwrap(hours), - minutes=_unwrap(minutes), - seconds=_unwrap(seconds), - milliseconds=_unwrap(milliseconds), - microseconds=_unwrap(microseconds), + weeks=_to_polars_expression(weeks), + days=_to_polars_expression(days), + hours=_to_polars_expression(hours), + minutes=_to_polars_expression(minutes), + seconds=_to_polars_expression(seconds), + milliseconds=_to_polars_expression(milliseconds), + microseconds=_to_polars_expression(microseconds), ), ) @@ -353,10 +362,10 @@ def time( from ._lazy_cell import _LazyCell # circular import - pl_hour = _unwrap(hour) - pl_minute = _unwrap(minute) - pl_second = _unwrap(second) - pl_microsecond = _unwrap(microsecond) + pl_hour = _to_polars_expression(hour) + pl_minute = _to_polars_expression(minute) + pl_second = _to_polars_expression(second) + pl_microsecond = _to_polars_expression(microsecond) # By default, microseconds overflow into seconds return _LazyCell( @@ -388,7 +397,7 @@ def first_not_none(cells: list[Cell[P]]) -> Cell[P | None]: if not cells: return Cell.constant(None) - return _LazyCell(pl.coalesce([_unwrap(cell) for cell in cells])) + return _LazyCell(pl.coalesce([_to_polars_expression(cell) for cell in cells])) # ------------------------------------------------------------------------------------------------------------------ # Dunder methods @@ -518,45 +527,89 @@ def __str__(self) -> str: ... @property @abstractmethod - def str(self) -> StringOperations: + def dt(self) -> DatetimeOperations: """ - Namespace for operations on strings. + Namespace for operations on datetime/date/time values. Examples -------- + >>> from datetime import datetime >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["hi", "hello"]) - >>> column.transform(lambda cell: cell.str.length()) + >>> column = Column("a", [datetime(2025, 1, 1), datetime(2024, 1, 1)]) + >>> column.transform(lambda cell: cell.dt.year()) + +------+ + | a | + | --- | + | i32 | + +======+ + | 2025 | + | 2024 | + +------+ + """ + + @property + @abstractmethod + def dur(self) -> DurationOperations: + """ + Namespace for operations on durations. + + Examples + -------- + >>> from datetime import timedelta + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [timedelta(hours=1), timedelta(hours=2)]) + >>> column.transform(lambda cell: cell.dur.full_hours()) +-----+ | a | | --- | - | u32 | + | i64 | +=====+ + | 1 | | 2 | - | 5 | +-----+ """ @property @abstractmethod - def dt(self) -> TemporalOperations: + def math(self) -> MathOperations: """ - Namespace for operations on temporal values. + Namespace for mathematical operations. Examples -------- - >>> import datetime + >>> from datetime import timedelta >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", [datetime.datetime(2025, 1, 1), datetime.datetime(2024, 1, 1)]) - >>> column.transform(lambda cell: cell.dt.year()) - +------+ - | a | - | --- | - | i32 | - +======+ - | 2025 | - | 2024 | - +------+ + >>> column = Column("a", [1, -2]) + >>> column.transform(lambda cell: cell.math.abs()) + +-----+ + | a | + | --- | + | i64 | + +=====+ + | 1 | + | 2 | + +-----+ + """ + + @property + @abstractmethod + def str(self) -> StringOperations: + """ + Namespace for operations on strings. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", ["hi", "hello"]) + >>> column.transform(lambda cell: cell.str.length()) + +-----+ + | a | + | --- | + | u32 | + +=====+ + | 2 | + | 5 | + +-----+ """ # ------------------------------------------------------------------------------------------------------------------ @@ -703,69 +756,6 @@ def xor(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: # Numeric operations # ------------------------------------------------------------------------------------------------------------------ - def abs(self) -> Cell: - """ - Get the absolute value. - - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", [1, -2, None]) - >>> column.transform(lambda cell: cell.abs()) - +------+ - | a | - | --- | - | i64 | - +======+ - | 1 | - | 2 | - | null | - +------+ - """ - return self.__abs__() - - def ceil(self) -> Cell: - """ - Round up to the nearest integer. - - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", [1.1, 3.0, None]) - >>> column.transform(lambda cell: cell.ceil()) - +---------+ - | a | - | --- | - | f64 | - +=========+ - | 2.00000 | - | 3.00000 | - | null | - +---------+ - """ - return self.__ceil__() - - def floor(self) -> Cell: - """ - Round down to the nearest integer. - - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", [1.1, 3.0, None]) - >>> column.transform(lambda cell: cell.floor()) - +---------+ - | a | - | --- | - | f64 | - +=========+ - | 1.00000 | - | 3.00000 | - | null | - +---------+ - """ - return self.__floor__() - def neg(self) -> Cell: """ Negate the value. This is equivalent to the unary `-` operator. @@ -1249,13 +1239,13 @@ def lt(self, other: _ConvertibleToCell) -> Cell[bool | None]: # ------------------------------------------------------------------------------------------------------------------ @abstractmethod - def cast(self, type_: ColumnType) -> Cell: + def cast(self, type: ColumnType) -> Cell: """ Cast the cell to a different type. Parameters ---------- - type_: + type: The type to cast to. Returns @@ -1298,7 +1288,7 @@ def _equals(self, other: object) -> bool: """ -def _unwrap(cell_proxy: _ConvertibleToCell) -> pl.Expr: +def _to_polars_expression(cell_proxy: _ConvertibleToCell) -> pl.Expr: import polars as pl if isinstance(cell_proxy, Cell): diff --git a/src/safeds/data/tabular/containers/_lazy_cell.py b/src/safeds/data/tabular/containers/_lazy_cell.py index 6f5190104..6055be763 100644 --- a/src/safeds/data/tabular/containers/_lazy_cell.py +++ b/src/safeds/data/tabular/containers/_lazy_cell.py @@ -4,13 +4,13 @@ from safeds._utils import _structural_hash -from ._cell import Cell, _unwrap +from ._cell import Cell, _to_polars_expression if TYPE_CHECKING: import polars as pl from safeds._typing import _ConvertibleToBooleanCell, _ConvertibleToCell - from safeds.data.tabular.query import StringOperations, TemporalOperations + from safeds.data.tabular.query import DatetimeOperations, DurationOperations, MathOperations, StringOperations from safeds.data.tabular.typing import ColumnType T = TypeVar("T") @@ -35,130 +35,130 @@ def __init__(self, expression: pl.Expr) -> None: def __invert__(self) -> Cell[bool | None]: import polars as pl - return _wrap(self._expression.cast(pl.Boolean).__invert__()) + return _LazyCell(self._expression.cast(pl.Boolean).__invert__()) def __and__(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: - other = _unwrap(other) - return _wrap(self._expression.__and__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__and__(other)) def __rand__(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: - other = _unwrap(other) - return _wrap(self._expression.__rand__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__rand__(other)) def __or__(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: - other = _unwrap(other) - return _wrap(self._expression.__or__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__or__(other)) def __ror__(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: - other = _unwrap(other) - return _wrap(self._expression.__ror__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__ror__(other)) def __xor__(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: - other = _unwrap(other) - return _wrap(self._expression.__xor__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__xor__(other)) def __rxor__(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: - other = _unwrap(other) - return _wrap(self._expression.__rxor__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__rxor__(other)) # Comparison --------------------------------------------------------------- def __eq__(self, other: _ConvertibleToCell) -> Cell[bool | None]: # type: ignore[override] - other = _unwrap(other) - return _wrap(self._expression.__eq__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__eq__(other)) def __ge__(self, other: _ConvertibleToCell) -> Cell[bool | None]: - other = _unwrap(other) - return _wrap(self._expression.__ge__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__ge__(other)) def __gt__(self, other: _ConvertibleToCell) -> Cell[bool | None]: - other = _unwrap(other) - return _wrap(self._expression.__gt__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__gt__(other)) def __le__(self, other: _ConvertibleToCell) -> Cell[bool | None]: - other = _unwrap(other) - return _wrap(self._expression.__le__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__le__(other)) def __lt__(self, other: _ConvertibleToCell) -> Cell[bool | None]: - other = _unwrap(other) - return _wrap(self._expression.__lt__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__lt__(other)) def __ne__(self, other: _ConvertibleToCell) -> Cell[bool | None]: # type: ignore[override] - other = _unwrap(other) - return _wrap(self._expression.__ne__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__ne__(other)) # Numeric operators -------------------------------------------------------- def __abs__(self) -> Cell: - return _wrap(self._expression.__abs__()) + return _LazyCell(self._expression.__abs__()) def __ceil__(self) -> Cell: - return _wrap(self._expression.ceil()) + return _LazyCell(self._expression.ceil()) def __floor__(self) -> Cell: - return _wrap(self._expression.floor()) + return _LazyCell(self._expression.floor()) def __neg__(self) -> Cell: - return _wrap(self._expression.__neg__()) + return _LazyCell(self._expression.__neg__()) def __pos__(self) -> Cell: - return _wrap(self._expression.__pos__()) + return _LazyCell(self._expression.__pos__()) def __add__(self, other: _ConvertibleToCell) -> Cell: - other = _unwrap(other) - return _wrap(self._expression.__add__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__add__(other)) def __radd__(self, other: _ConvertibleToCell) -> Cell: - other = _unwrap(other) - return _wrap(self._expression.__radd__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__radd__(other)) def __floordiv__(self, other: _ConvertibleToCell) -> Cell: - other = _unwrap(other) - return _wrap(self._expression.__floordiv__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__floordiv__(other)) def __rfloordiv__(self, other: _ConvertibleToCell) -> Cell: - other = _unwrap(other) - return _wrap(self._expression.__rfloordiv__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__rfloordiv__(other)) def __mod__(self, other: _ConvertibleToCell) -> Cell: - other = _unwrap(other) - return _wrap(self._expression.__mod__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__mod__(other)) def __rmod__(self, other: _ConvertibleToCell) -> Cell: - other = _unwrap(other) - return _wrap(self._expression.__rmod__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__rmod__(other)) def __mul__(self, other: _ConvertibleToCell) -> Cell: - other = _unwrap(other) - return _wrap(self._expression.__mul__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__mul__(other)) def __rmul__(self, other: _ConvertibleToCell) -> Cell: - other = _unwrap(other) - return _wrap(self._expression.__rmul__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__rmul__(other)) def __pow__(self, other: _ConvertibleToCell) -> Cell: - other = _unwrap(other) - return _wrap(self._expression.__pow__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__pow__(other)) def __rpow__(self, other: _ConvertibleToCell) -> Cell: - other = _unwrap(other) - return _wrap(self._expression.__rpow__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__rpow__(other)) def __sub__(self, other: _ConvertibleToCell) -> Cell: - other = _unwrap(other) - return _wrap(self._expression.__sub__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__sub__(other)) def __rsub__(self, other: _ConvertibleToCell) -> Cell: - other = _unwrap(other) - return _wrap(self._expression.__rsub__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__rsub__(other)) def __truediv__(self, other: _ConvertibleToCell) -> Cell: - other = _unwrap(other) - return _wrap(self._expression.__truediv__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__truediv__(other)) def __rtruediv__(self, other: _ConvertibleToCell) -> Cell: - other = _unwrap(other) - return _wrap(self._expression.__rtruediv__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__rtruediv__(other)) # Other -------------------------------------------------------------------- @@ -179,42 +179,54 @@ def __str__(self) -> str: # ------------------------------------------------------------------------------------------------------------------ @property - def str(self) -> StringOperations: - from safeds.data.tabular.query._lazy_string_operations import _LazyStringOperations # circular import + def dt(self) -> DatetimeOperations: + from safeds.data.tabular.query._lazy_datetime_operations import _LazyDatetimeOperations # circular import - return _LazyStringOperations(self._expression) + return _LazyDatetimeOperations(self._expression) + + @property + def dur(self) -> DurationOperations: + from safeds.data.tabular.query._lazy_duration_operations import _LazyDurationOperations # circular import + + return _LazyDurationOperations(self._expression) + + @property + def math(self) -> MathOperations: + from safeds.data.tabular.query._lazy_math_operations import _LazyMathOperations # circular import + + return _LazyMathOperations(self._expression) @property - def dt(self) -> TemporalOperations: - from safeds.data.tabular.query._lazy_temporal_operations import _LazyTemporalOperations # circular import + def str(self) -> StringOperations: + from safeds.data.tabular.query._lazy_string_operations import _LazyStringOperations # circular import - return _LazyTemporalOperations(self._expression) + return _LazyStringOperations(self._expression) # ------------------------------------------------------------------------------------------------------------------ # Comparison operations # ------------------------------------------------------------------------------------------------------------------ def eq(self, other: _ConvertibleToCell, *, propagate_missing_values: bool = True) -> Cell[bool | None]: - other = _unwrap(other) + other = _to_polars_expression(other) if propagate_missing_values: - return _wrap(self._expression.eq(other)) + return _LazyCell(self._expression.eq(other)) else: - return _wrap(self._expression.eq_missing(other)) + return _LazyCell(self._expression.eq_missing(other)) def neq(self, other: _ConvertibleToCell, *, propagate_missing_values: bool = True) -> Cell[bool | None]: - other = _unwrap(other) + other = _to_polars_expression(other) if propagate_missing_values: - return _wrap(self._expression.ne(other)) + return _LazyCell(self._expression.ne(other)) else: - return _wrap(self._expression.ne_missing(other)) + return _LazyCell(self._expression.ne_missing(other)) # ------------------------------------------------------------------------------------------------------------------ # Other # ------------------------------------------------------------------------------------------------------------------ - def cast(self, type_: ColumnType) -> Cell: - return _wrap(self._expression.cast(type_._polars_data_type)) + def cast(self, type: ColumnType) -> Cell: + return _LazyCell(self._expression.cast(type._polars_data_type)) # ------------------------------------------------------------------------------------------------------------------ # Internal @@ -230,7 +242,3 @@ def _equals(self, other: object) -> bool: if self is other: return True return self._expression.meta.eq(other._expression) - - -def _wrap(expression: pl.Expr) -> Cell: - return _LazyCell(expression) diff --git a/src/safeds/data/tabular/query/__init__.py b/src/safeds/data/tabular/query/__init__.py index 52200686f..d3671f51b 100644 --- a/src/safeds/data/tabular/query/__init__.py +++ b/src/safeds/data/tabular/query/__init__.py @@ -5,18 +5,24 @@ import apipkg if TYPE_CHECKING: + from ._datetime_operations import DatetimeOperations + from ._duration_operations import DurationOperations + from ._math_operations import MathOperations from ._string_operations import StringOperations - from ._temporal_operations import TemporalOperations apipkg.initpkg( __name__, { + "DatetimeOperations": "._datetime_operations:DatetimeOperations", + "DurationOperations": "._duration_operations:DurationOperations", + "MathOperations": "._math_operations:MathOperations", "StringOperations": "._string_operations:StringOperations", - "TemporalOperations": "._temporal_operations:TemporalOperations", }, ) __all__ = [ + "DatetimeOperations", + "DurationOperations", + "MathOperations", "StringOperations", - "TemporalOperations", ] diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_abs.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_abs.py index 7cc40c878..0a746ad90 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_abs.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_abs.py @@ -25,9 +25,8 @@ "None", ], ) -class TestShouldReturnAbsoluteValue: - def test_dunder_method(self, value: float | None, expected: float | None) -> None: - assert_cell_operation_works(value, lambda cell: abs(cell), expected, type_if_none=ColumnType.float64()) +def test_should_return_absolute_value(value: float | None, expected: float | None) -> None: + assert_cell_operation_works(value, lambda cell: abs(cell), expected, type_if_none=ColumnType.float64()) - def test_named_method(self, value: float | None, expected: float | None) -> None: - assert_cell_operation_works(value, lambda cell: cell.abs(), expected, type_if_none=ColumnType.float64()) + +# The corresponding named method is inside the `math` namespace. diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_ceil.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_ceil.py index 32dce3d20..01e16721f 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_ceil.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_ceil.py @@ -27,9 +27,8 @@ "None", ], ) -class TestShouldReturnCeiling: - def test_dunder_method(self, value: float | None, expected: float | None) -> None: - assert_cell_operation_works(value, lambda cell: math.ceil(cell), expected, type_if_none=ColumnType.float64()) +def test_should_return_ceiling(value: float | None, expected: float | None) -> None: + assert_cell_operation_works(value, lambda cell: math.ceil(cell), expected, type_if_none=ColumnType.float64()) - def test_named_method(self, value: float | None, expected: float | None) -> None: - assert_cell_operation_works(value, lambda cell: cell.ceil(), expected, type_if_none=ColumnType.float64()) + +# The corresponding named method is inside the `math` namespace. diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_floor.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_floor.py index 73ecf8e85..e8afaebdc 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_floor.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_floor.py @@ -27,9 +27,8 @@ "None", ], ) -class TestShouldReturnFloor: - def test_dunder_method(self, value: float | None, expected: float | None) -> None: - assert_cell_operation_works(value, lambda cell: math.floor(cell), expected, type_if_none=ColumnType.float64()) +def test_should_return_floor(value: float | None, expected: float | None) -> None: + assert_cell_operation_works(value, lambda cell: math.floor(cell), expected, type_if_none=ColumnType.float64()) - def test_named_method(self, value: float | None, expected: float | None) -> None: - assert_cell_operation_works(value, lambda cell: cell.floor(), expected, type_if_none=ColumnType.float64()) + +# The corresponding named method is inside the `math` namespace. diff --git a/tests/safeds/data/tabular/query/_lazy_duration_operations/__init__.py b/tests/safeds/data/tabular/query/_lazy_duration_operations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/safeds/data/tabular/query/_lazy_math_operations/__init__.py b/tests/safeds/data/tabular/query/_lazy_math_operations/__init__.py new file mode 100644 index 000000000..e69de29bb From 7a791bd4989aba3727df3f0e4cef78201f42604c Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Thu, 16 Jan 2025 15:01:15 +0100 Subject: [PATCH 10/57] feat: test dunder methods of `_LazyDurationOperations` --- .../tabular/query/_duration_operations.py | 37 ++++++++ .../query/_lazy_duration_operations.py | 40 +++++++++ .../__snapshots__/test_hash.ambr | 7 ++ .../_lazy_duration_operations/test_eq.py | 90 +++++++++++++++++++ .../_lazy_duration_operations/test_hash.py | 64 +++++++++++++ .../_lazy_duration_operations/test_repr.py | 28 ++++++ .../_lazy_duration_operations/test_sizeof.py | 23 +++++ .../_lazy_duration_operations/test_str.py | 28 ++++++ 8 files changed, 317 insertions(+) create mode 100644 src/safeds/data/tabular/query/_duration_operations.py create mode 100644 src/safeds/data/tabular/query/_lazy_duration_operations.py create mode 100644 tests/safeds/data/tabular/query/_lazy_duration_operations/__snapshots__/test_hash.ambr create mode 100644 tests/safeds/data/tabular/query/_lazy_duration_operations/test_eq.py create mode 100644 tests/safeds/data/tabular/query/_lazy_duration_operations/test_hash.py create mode 100644 tests/safeds/data/tabular/query/_lazy_duration_operations/test_repr.py create mode 100644 tests/safeds/data/tabular/query/_lazy_duration_operations/test_sizeof.py create mode 100644 tests/safeds/data/tabular/query/_lazy_duration_operations/test_str.py diff --git a/src/safeds/data/tabular/query/_duration_operations.py b/src/safeds/data/tabular/query/_duration_operations.py new file mode 100644 index 000000000..4285965dd --- /dev/null +++ b/src/safeds/data/tabular/query/_duration_operations.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING, Literal + +if TYPE_CHECKING: + from safeds.data.tabular.containers import Cell + + +# TODO: Examples with None + + +class DurationOperations(ABC): + """ + Namespace for operations on durations. + + This class cannot be instantiated directly. It can only be accessed using the `dur` attribute of a cell. + """ + + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + + @abstractmethod + def __eq__(self, other: object) -> bool: ... + + @abstractmethod + def __hash__(self) -> int: ... + + @abstractmethod + def __repr__(self) -> str: ... + + @abstractmethod + def __sizeof__(self) -> int: ... + + @abstractmethod + def __str__(self) -> str: ... diff --git a/src/safeds/data/tabular/query/_lazy_duration_operations.py b/src/safeds/data/tabular/query/_lazy_duration_operations.py new file mode 100644 index 000000000..2584d0493 --- /dev/null +++ b/src/safeds/data/tabular/query/_lazy_duration_operations.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Literal + +from safeds._utils import _structural_hash +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import DurationOperations + +if TYPE_CHECKING: + import polars as pl + + from safeds.data.tabular.containers import Cell + + +class _LazyDurationOperations(DurationOperations): + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + + def __init__(self, expression: pl.Expr) -> None: + self._expression: pl.Expr = expression + + def __eq__(self, other: object) -> bool: + if not isinstance(other, _LazyDurationOperations): + return NotImplemented + if self is other: + return True + return self._expression.meta.eq(other._expression) + + def __hash__(self) -> int: + return _structural_hash(self._expression.meta.serialize()) + + def __repr__(self) -> str: + return f"_LazyDurationOperations({self._expression})" + + def __sizeof__(self) -> int: + return self._expression.__sizeof__() + + def __str__(self) -> str: + return f"({self._expression}).dur" diff --git a/tests/safeds/data/tabular/query/_lazy_duration_operations/__snapshots__/test_hash.ambr b/tests/safeds/data/tabular/query/_lazy_duration_operations/__snapshots__/test_hash.ambr new file mode 100644 index 000000000..99b9cb6b3 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_duration_operations/__snapshots__/test_hash.ambr @@ -0,0 +1,7 @@ +# serializer version: 1 +# name: TestContract.test_should_return_same_hash_in_different_processes[column] + 8162512882156938440 +# --- +# name: TestContract.test_should_return_same_hash_in_different_processes[duration] + 2005674043565732975 +# --- diff --git a/tests/safeds/data/tabular/query/_lazy_duration_operations/test_eq.py b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_eq.py new file mode 100644 index 000000000..c7810b321 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_eq.py @@ -0,0 +1,90 @@ +from typing import Any + +import polars as pl +import pytest + +from safeds.data.tabular.containers import Cell, Column +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import DurationOperations + + +@pytest.mark.parametrize( + ("ops_1", "ops_2", "expected"), + [ + # equal (duration) + ( + Cell.duration(hours=1).dur, + Cell.duration(hours=1).dur, + True, + ), + # equal (column) + ( + _LazyCell(pl.col("a")).dur, + _LazyCell(pl.col("a")).dur, + True, + ), + # not equal (different durations) + ( + Cell.duration(hours=1).dur, + Cell.duration(hours=2).dur, + False, + ), + # not equal (different columns) + ( + _LazyCell(pl.col("a")).dur, + _LazyCell(pl.col("b")).dur, + False, + ), + # not equal (different cell kinds) + ( + Cell.duration(hours=1).dur, + _LazyCell(pl.col("a")).dur, + False, + ), + ], + ids=[ + # Equal + "equal (duration)", + "equal (column)", + # Not equal + "not equal (different durations)", + "not equal (different columns)", + "not equal (different cell kinds)", + ], +) +def test_should_return_whether_objects_are_equal( + ops_1: DurationOperations, + ops_2: DurationOperations, + expected: bool, +) -> None: + assert (ops_1.__eq__(ops_2)) == expected + + +@pytest.mark.parametrize( + "ops", + [ + Cell.duration(hours=1).dur, + _LazyCell(pl.col("a")).dur, + ], + ids=[ + "duration", + "column", + ], +) +def test_should_return_true_if_objects_are_identical(ops: DurationOperations) -> None: + assert (ops.__eq__(ops)) is True + + +@pytest.mark.parametrize( + ("ops", "other"), + [ + (Cell.duration(hours=1).dur, None), + (Cell.duration(hours=1).dur, Column("col1", [1])), + ], + ids=[ + "DurationOperations vs. None", + "DurationOperations vs. Column", + ], +) +def test_should_return_not_implemented_if_other_has_different_type(ops: DurationOperations, other: Any) -> None: + assert (ops.__eq__(other)) is NotImplemented diff --git a/tests/safeds/data/tabular/query/_lazy_duration_operations/test_hash.py b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_hash.py new file mode 100644 index 000000000..baf9a9e6e --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_hash.py @@ -0,0 +1,64 @@ +from collections.abc import Callable + +import polars as pl +import pytest +from syrupy import SnapshotAssertion + +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import DurationOperations + + +@pytest.mark.parametrize( + "ops_factory", + [ + lambda: Cell.duration(hours=1).dur, + lambda: _LazyCell(pl.col("a")).dur, + ], + ids=[ + "duration", + "column", + ], +) +class TestContract: + def test_should_return_same_hash_for_equal_objects(self, ops_factory: Callable[[], DurationOperations]) -> None: + ops_1 = ops_factory() + ops_2 = ops_factory() + assert hash(ops_1) == hash(ops_2) + + def test_should_return_same_hash_in_different_processes( + self, + ops_factory: Callable[[], DurationOperations], + snapshot: SnapshotAssertion, + ) -> None: + ops = ops_factory() + assert hash(ops) == snapshot + + +@pytest.mark.parametrize( + ("ops_1", "ops_2"), + [ + # different durations + ( + Cell.duration(hours=1).dur, + Cell.duration(hours=2).dur, + ), + # different columns + ( + _LazyCell(pl.col("a")).dur, + _LazyCell(pl.col("b")).dur, + ), + # different cell kinds + ( + Cell.duration(hours=1).dur, + _LazyCell(pl.col("a")).dur, + ), + ], + ids=[ + "different durations", + "different columns", + "different cell kinds", + ], +) +def test_should_be_good_hash(ops_1: DurationOperations, ops_2: DurationOperations) -> None: + assert hash(ops_1) != hash(ops_2) diff --git a/tests/safeds/data/tabular/query/_lazy_duration_operations/test_repr.py b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_repr.py new file mode 100644 index 000000000..215904a8d --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_repr.py @@ -0,0 +1,28 @@ +import polars as pl +import pytest + +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import DurationOperations + + +@pytest.mark.parametrize( + ("ops", "expected"), + [ + ( + Cell.duration(hours=1).dur, + '_LazyDurationOperations(1h.alias("duration"))', + ), + ( + _LazyCell(pl.col("a")).dur, + '_LazyDurationOperations(col("a"))', + ), + ], + ids=[ + "duration", + "column", + ], +) +def test_should_return_a_string_representation(ops: DurationOperations, expected: str) -> None: + # We do not care about the exact string representation, this is only for debugging + assert repr(ops) == expected diff --git a/tests/safeds/data/tabular/query/_lazy_duration_operations/test_sizeof.py b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_sizeof.py new file mode 100644 index 000000000..512ef6a66 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_sizeof.py @@ -0,0 +1,23 @@ +import sys + +import polars as pl +import pytest + +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import DurationOperations + + +@pytest.mark.parametrize( + "ops", + [ + Cell.duration(hours=1).dur, + _LazyCell(pl.col("a")).dur, + ], + ids=[ + "duration", + "column", + ], +) +def test_should_be_larger_than_normal_object(ops: DurationOperations) -> None: + assert sys.getsizeof(ops) > sys.getsizeof(object()) diff --git a/tests/safeds/data/tabular/query/_lazy_duration_operations/test_str.py b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_str.py new file mode 100644 index 000000000..81152e5db --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_str.py @@ -0,0 +1,28 @@ +import polars as pl +import pytest + +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import DurationOperations + + +@pytest.mark.parametrize( + ("ops", "expected"), + [ + ( + Cell.duration(hours=1).dur, + '(1h.alias("duration")).dur', + ), + ( + _LazyCell(pl.col("a")).dur, + '(col("a")).dur', + ), + ], + ids=[ + "constant", + "column", + ], +) +def test_should_return_a_string_representation(ops: DurationOperations, expected: str) -> None: + # We do not care about the exact string representation, this is only for debugging + assert str(ops) == expected From 401f368661148d0d17e282f274c26cd3ad918179 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Thu, 16 Jan 2025 15:19:31 +0100 Subject: [PATCH 11/57] feat: test dunder methods of `_LazyMathOperations` --- .../tabular/query/_lazy_math_operations.py | 53 +++++++++ .../data/tabular/query/_math_operations.py | 101 ++++++++++++++++++ .../__snapshots__/test_hash.ambr | 7 ++ .../query/_lazy_math_operations/test_abs.py | 29 +++++ .../query/_lazy_math_operations/test_ceil.py | 29 +++++ .../query/_lazy_math_operations/test_eq.py | 90 ++++++++++++++++ .../query/_lazy_math_operations/test_floor.py | 29 +++++ .../query/_lazy_math_operations/test_hash.py | 64 +++++++++++ .../query/_lazy_math_operations/test_repr.py | 28 +++++ .../_lazy_math_operations/test_sizeof.py | 23 ++++ .../query/_lazy_math_operations/test_str.py | 28 +++++ 11 files changed, 481 insertions(+) create mode 100644 src/safeds/data/tabular/query/_lazy_math_operations.py create mode 100644 src/safeds/data/tabular/query/_math_operations.py create mode 100644 tests/safeds/data/tabular/query/_lazy_math_operations/__snapshots__/test_hash.ambr create mode 100644 tests/safeds/data/tabular/query/_lazy_math_operations/test_abs.py create mode 100644 tests/safeds/data/tabular/query/_lazy_math_operations/test_ceil.py create mode 100644 tests/safeds/data/tabular/query/_lazy_math_operations/test_eq.py create mode 100644 tests/safeds/data/tabular/query/_lazy_math_operations/test_floor.py create mode 100644 tests/safeds/data/tabular/query/_lazy_math_operations/test_hash.py create mode 100644 tests/safeds/data/tabular/query/_lazy_math_operations/test_repr.py create mode 100644 tests/safeds/data/tabular/query/_lazy_math_operations/test_sizeof.py create mode 100644 tests/safeds/data/tabular/query/_lazy_math_operations/test_str.py diff --git a/src/safeds/data/tabular/query/_lazy_math_operations.py b/src/safeds/data/tabular/query/_lazy_math_operations.py new file mode 100644 index 000000000..b141fe143 --- /dev/null +++ b/src/safeds/data/tabular/query/_lazy_math_operations.py @@ -0,0 +1,53 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from safeds._utils import _structural_hash +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query._math_operations import MathOperations + +if TYPE_CHECKING: + import polars as pl + + from safeds.data.tabular.containers import Cell + + +class _LazyMathOperations(MathOperations): + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + + def __init__(self, expression: pl.Expr) -> None: + self._expression: pl.Expr = expression + + def __eq__(self, other: object) -> bool: + if not isinstance(other, _LazyMathOperations): + return NotImplemented + if self is other: + return True + return self._expression.meta.eq(other._expression) + + def __hash__(self) -> int: + return _structural_hash(self._expression.meta.serialize()) + + def __repr__(self) -> str: + return f"_LazyMathOperations({self._expression})" + + def __sizeof__(self) -> int: + return self._expression.__sizeof__() + + def __str__(self) -> str: + return f"({self._expression}).math" + + # ------------------------------------------------------------------------------------------------------------------ + # Math operations + # ------------------------------------------------------------------------------------------------------------------ + + def abs(self) -> Cell: + return _LazyCell(self._expression.__abs__()) + + def ceil(self) -> Cell: + return _LazyCell(self._expression.ceil()) + + def floor(self) -> Cell: + return _LazyCell(self._expression.floor()) diff --git a/src/safeds/data/tabular/query/_math_operations.py b/src/safeds/data/tabular/query/_math_operations.py new file mode 100644 index 000000000..7f7b208a7 --- /dev/null +++ b/src/safeds/data/tabular/query/_math_operations.py @@ -0,0 +1,101 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from safeds.data.tabular.containers import Cell + + +class MathOperations(ABC): + """ + Namespace for mathematical operations. + + This class cannot be instantiated directly. It can only be accessed using the `math` attribute of a cell. + """ + + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + + @abstractmethod + def __eq__(self, other: object) -> bool: ... + + @abstractmethod + def __hash__(self) -> int: ... + + @abstractmethod + def __repr__(self) -> str: ... + + @abstractmethod + def __sizeof__(self) -> int: ... + + @abstractmethod + def __str__(self) -> str: ... + + # ------------------------------------------------------------------------------------------------------------------ + # Math operations + # ------------------------------------------------------------------------------------------------------------------ + + @abstractmethod + def abs(self) -> Cell: + """ + Get the absolute value. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [1, -2, None]) + >>> column.transform(lambda cell: cell.math.abs()) + +------+ + | a | + | --- | + | i64 | + +======+ + | 1 | + | 2 | + | null | + +------+ + """ + + @abstractmethod + def ceil(self) -> Cell: + """ + Round up to the nearest integer. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [1.1, 3.0, None]) + >>> column.transform(lambda cell: cell.math.ceil()) + +---------+ + | a | + | --- | + | f64 | + +=========+ + | 2.00000 | + | 3.00000 | + | null | + +---------+ + """ + + @abstractmethod + def floor(self) -> Cell: + """ + Round down to the nearest integer. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [1.1, 3.0, None]) + >>> column.transform(lambda cell: cell.math.floor()) + +---------+ + | a | + | --- | + | f64 | + +=========+ + | 1.00000 | + | 3.00000 | + | null | + +---------+ + """ diff --git a/tests/safeds/data/tabular/query/_lazy_math_operations/__snapshots__/test_hash.ambr b/tests/safeds/data/tabular/query/_lazy_math_operations/__snapshots__/test_hash.ambr new file mode 100644 index 000000000..87706531c --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_math_operations/__snapshots__/test_hash.ambr @@ -0,0 +1,7 @@ +# serializer version: 1 +# name: TestContract.test_should_return_same_hash_in_different_processes[column] + 8162512882156938440 +# --- +# name: TestContract.test_should_return_same_hash_in_different_processes[constant] + 4610312201483200147 +# --- diff --git a/tests/safeds/data/tabular/query/_lazy_math_operations/test_abs.py b/tests/safeds/data/tabular/query/_lazy_math_operations/test_abs.py new file mode 100644 index 000000000..d7d119d26 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_math_operations/test_abs.py @@ -0,0 +1,29 @@ +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (0, 0), + (0.0, 0.0), + (10, 10), + (10.5, 10.5), + (-10, 10), + (-10.5, 10.5), + (None, None), + ], + ids=[ + "zero int", + "zero float", + "positive int", + "positive float", + "negative int", + "negative float", + "None", + ], +) +def test_should_return_absolute_value(value: float | None, expected: float | None) -> None: + assert_cell_operation_works(value, lambda cell: cell.math.abs(), expected, type_if_none=ColumnType.float64()) diff --git a/tests/safeds/data/tabular/query/_lazy_math_operations/test_ceil.py b/tests/safeds/data/tabular/query/_lazy_math_operations/test_ceil.py new file mode 100644 index 000000000..814c81d6a --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_math_operations/test_ceil.py @@ -0,0 +1,29 @@ +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (0, 0), + (0.0, 0), + (10, 10), + (10.5, 11), + (-10, -10), + (-10.5, -10), + (None, None), + ], + ids=[ + "zero int", + "zero float", + "positive int", + "positive float", + "negative int", + "negative float", + "None", + ], +) +def test_should_return_ceiling(value: float | None, expected: float | None) -> None: + assert_cell_operation_works(value, lambda cell: cell.math.ceil(), expected, type_if_none=ColumnType.float64()) diff --git a/tests/safeds/data/tabular/query/_lazy_math_operations/test_eq.py b/tests/safeds/data/tabular/query/_lazy_math_operations/test_eq.py new file mode 100644 index 000000000..f75bbe902 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_math_operations/test_eq.py @@ -0,0 +1,90 @@ +from typing import Any + +import polars as pl +import pytest + +from safeds.data.tabular.containers import Cell, Column +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import MathOperations + + +@pytest.mark.parametrize( + ("ops_1", "ops_2", "expected"), + [ + # equal (constant) + ( + Cell.constant(1).math, + Cell.constant(1).math, + True, + ), + # equal (column) + ( + _LazyCell(pl.col("a")).math, + _LazyCell(pl.col("a")).math, + True, + ), + # not equal (different constant) + ( + Cell.constant(1).math, + Cell.constant(2).math, + False, + ), + # not equal (different columns) + ( + _LazyCell(pl.col("a")).math, + _LazyCell(pl.col("b")).math, + False, + ), + # not equal (different cell kinds) + ( + Cell.constant(1).math, + _LazyCell(pl.col("a")).math, + False, + ), + ], + ids=[ + # Equal + "equal (constant)", + "equal (column)", + # Not equal + "not equal (different constants)", + "not equal (different columns)", + "not equal (different cell kinds)", + ], +) +def test_should_return_whether_objects_are_equal( + ops_1: MathOperations, + ops_2: MathOperations, + expected: bool, +) -> None: + assert (ops_1.__eq__(ops_2)) == expected + + +@pytest.mark.parametrize( + "ops", + [ + Cell.constant(1).math, + _LazyCell(pl.col("a")).math, + ], + ids=[ + "duration", + "column", + ], +) +def test_should_return_true_if_objects_are_identical(ops: MathOperations) -> None: + assert (ops.__eq__(ops)) is True + + +@pytest.mark.parametrize( + ("ops", "other"), + [ + (Cell.constant(1).math, None), + (Cell.constant(1).math, Column("col1", [1])), + ], + ids=[ + "MathOperations vs. None", + "MathOperations vs. Column", + ], +) +def test_should_return_not_implemented_if_other_has_different_type(ops: MathOperations, other: Any) -> None: + assert (ops.__eq__(other)) is NotImplemented diff --git a/tests/safeds/data/tabular/query/_lazy_math_operations/test_floor.py b/tests/safeds/data/tabular/query/_lazy_math_operations/test_floor.py new file mode 100644 index 000000000..d773fe90a --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_math_operations/test_floor.py @@ -0,0 +1,29 @@ +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (0, 0), + (0.0, 0), + (10, 10), + (10.5, 10), + (-10, -10), + (-10.5, -11), + (None, None), + ], + ids=[ + "zero int", + "zero float", + "positive int", + "positive float", + "negative int", + "negative float", + "None", + ], +) +def test_should_return_floor(value: float | None, expected: float | None) -> None: + assert_cell_operation_works(value, lambda cell: cell.math.floor(), expected, type_if_none=ColumnType.float64()) diff --git a/tests/safeds/data/tabular/query/_lazy_math_operations/test_hash.py b/tests/safeds/data/tabular/query/_lazy_math_operations/test_hash.py new file mode 100644 index 000000000..4e7f63514 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_math_operations/test_hash.py @@ -0,0 +1,64 @@ +from collections.abc import Callable + +import polars as pl +import pytest +from syrupy import SnapshotAssertion + +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import MathOperations + + +@pytest.mark.parametrize( + "ops_factory", + [ + lambda: Cell.constant(1).math, + lambda: _LazyCell(pl.col("a")).math, + ], + ids=[ + "constant", + "column", + ], +) +class TestContract: + def test_should_return_same_hash_for_equal_objects(self, ops_factory: Callable[[], MathOperations]) -> None: + ops_1 = ops_factory() + ops_2 = ops_factory() + assert hash(ops_1) == hash(ops_2) + + def test_should_return_same_hash_in_different_processes( + self, + ops_factory: Callable[[], MathOperations], + snapshot: SnapshotAssertion, + ) -> None: + ops = ops_factory() + assert hash(ops) == snapshot + + +@pytest.mark.parametrize( + ("ops_1", "ops_2"), + [ + # different constant values + ( + Cell.constant(1).math, + Cell.constant(2).math, + ), + # different columns + ( + _LazyCell(pl.col("a")).math, + _LazyCell(pl.col("b")).math, + ), + # different cell kinds + ( + Cell.constant(1).math, + _LazyCell(pl.col("a")).math, + ), + ], + ids=[ + "different constant values", + "different columns", + "different cell kinds", + ], +) +def test_should_be_good_hash(ops_1: MathOperations, ops_2: MathOperations) -> None: + assert hash(ops_1) != hash(ops_2) diff --git a/tests/safeds/data/tabular/query/_lazy_math_operations/test_repr.py b/tests/safeds/data/tabular/query/_lazy_math_operations/test_repr.py new file mode 100644 index 000000000..c54ddcce4 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_math_operations/test_repr.py @@ -0,0 +1,28 @@ +import polars as pl +import pytest + +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import MathOperations + + +@pytest.mark.parametrize( + ("ops", "expected"), + [ + ( + Cell.constant(1).math, + "_LazyMathOperations(dyn int: 1)", + ), + ( + _LazyCell(pl.col("a")).math, + '_LazyMathOperations(col("a"))', + ), + ], + ids=[ + "constant", + "column", + ], +) +def test_should_return_a_string_representation(ops: MathOperations, expected: str) -> None: + # We do not care about the exact string representation, this is only for debugging + assert repr(ops) == expected diff --git a/tests/safeds/data/tabular/query/_lazy_math_operations/test_sizeof.py b/tests/safeds/data/tabular/query/_lazy_math_operations/test_sizeof.py new file mode 100644 index 000000000..25f6c0a94 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_math_operations/test_sizeof.py @@ -0,0 +1,23 @@ +import sys + +import polars as pl +import pytest + +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import MathOperations + + +@pytest.mark.parametrize( + "ops", + [ + Cell.constant(1).math, + _LazyCell(pl.col("a")).math, + ], + ids=[ + "constant", + "column", + ], +) +def test_should_be_larger_than_normal_object(ops: MathOperations) -> None: + assert sys.getsizeof(ops) > sys.getsizeof(object()) diff --git a/tests/safeds/data/tabular/query/_lazy_math_operations/test_str.py b/tests/safeds/data/tabular/query/_lazy_math_operations/test_str.py new file mode 100644 index 000000000..68988573d --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_math_operations/test_str.py @@ -0,0 +1,28 @@ +import polars as pl +import pytest + +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import MathOperations + + +@pytest.mark.parametrize( + ("ops", "expected"), + [ + ( + Cell.constant(1).math, + "(dyn int: 1).math", + ), + ( + _LazyCell(pl.col("a")).math, + '(col("a")).math', + ), + ], + ids=[ + "constant", + "column", + ], +) +def test_should_return_a_string_representation(ops: MathOperations, expected: str) -> None: + # We do not care about the exact string representation, this is only for debugging + assert str(ops) == expected From c69e088b0134d48ca6e7ff56dfef6c2956bd8321 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Thu, 16 Jan 2025 15:28:58 +0100 Subject: [PATCH 12/57] feat: test dunder methods of `_LazyDatetimeOperations` --- .../__snapshots__/test_hash.ambr | 4 +- .../test_century.py | 0 .../test_date_to_string.py | 0 .../test_datetime_to_string.py | 0 .../test_day.py | 0 .../test_eq.py | 40 +++++++++---------- .../test_hash.py | 22 +++++----- .../test_month.py | 0 .../test_repr.py | 12 ++---- .../test_sizeof.py | 8 ++-- .../test_str.py | 10 +---- .../test_week.py | 0 .../test_weekday.py | 0 .../test_year.py | 0 14 files changed, 42 insertions(+), 54 deletions(-) rename tests/safeds/data/tabular/query/{_lazy_temporal_operations => _lazy_datetime_operations}/__snapshots__/test_hash.ambr (83%) rename tests/safeds/data/tabular/query/{_lazy_temporal_operations => _lazy_datetime_operations}/test_century.py (100%) rename tests/safeds/data/tabular/query/{_lazy_temporal_operations => _lazy_datetime_operations}/test_date_to_string.py (100%) rename tests/safeds/data/tabular/query/{_lazy_temporal_operations => _lazy_datetime_operations}/test_datetime_to_string.py (100%) rename tests/safeds/data/tabular/query/{_lazy_temporal_operations => _lazy_datetime_operations}/test_day.py (100%) rename tests/safeds/data/tabular/query/{_lazy_temporal_operations => _lazy_datetime_operations}/test_eq.py (62%) rename tests/safeds/data/tabular/query/{_lazy_temporal_operations => _lazy_datetime_operations}/test_hash.py (70%) rename tests/safeds/data/tabular/query/{_lazy_temporal_operations => _lazy_datetime_operations}/test_month.py (100%) rename tests/safeds/data/tabular/query/{_lazy_temporal_operations => _lazy_datetime_operations}/test_repr.py (53%) rename tests/safeds/data/tabular/query/{_lazy_temporal_operations => _lazy_datetime_operations}/test_sizeof.py (66%) rename tests/safeds/data/tabular/query/{_lazy_temporal_operations => _lazy_datetime_operations}/test_str.py (60%) rename tests/safeds/data/tabular/query/{_lazy_temporal_operations => _lazy_datetime_operations}/test_week.py (100%) rename tests/safeds/data/tabular/query/{_lazy_temporal_operations => _lazy_datetime_operations}/test_weekday.py (100%) rename tests/safeds/data/tabular/query/{_lazy_temporal_operations => _lazy_datetime_operations}/test_year.py (100%) diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_operations/__snapshots__/test_hash.ambr b/tests/safeds/data/tabular/query/_lazy_datetime_operations/__snapshots__/test_hash.ambr similarity index 83% rename from tests/safeds/data/tabular/query/_lazy_temporal_operations/__snapshots__/test_hash.ambr rename to tests/safeds/data/tabular/query/_lazy_datetime_operations/__snapshots__/test_hash.ambr index 99b9cb6b3..4ed1678cb 100644 --- a/tests/safeds/data/tabular/query/_lazy_temporal_operations/__snapshots__/test_hash.ambr +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/__snapshots__/test_hash.ambr @@ -2,6 +2,6 @@ # name: TestContract.test_should_return_same_hash_in_different_processes[column] 8162512882156938440 # --- -# name: TestContract.test_should_return_same_hash_in_different_processes[duration] - 2005674043565732975 +# name: TestContract.test_should_return_same_hash_in_different_processes[time] + 1565184979992361175 # --- diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_century.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_century.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_temporal_operations/test_century.py rename to tests/safeds/data/tabular/query/_lazy_datetime_operations/test_century.py diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_date_to_string.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_date_to_string.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_temporal_operations/test_date_to_string.py rename to tests/safeds/data/tabular/query/_lazy_datetime_operations/test_date_to_string.py diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_datetime_to_string.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_datetime_to_string.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_temporal_operations/test_datetime_to_string.py rename to tests/safeds/data/tabular/query/_lazy_datetime_operations/test_datetime_to_string.py diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_day.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_day.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_temporal_operations/test_day.py rename to tests/safeds/data/tabular/query/_lazy_datetime_operations/test_day.py diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_eq.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_eq.py similarity index 62% rename from tests/safeds/data/tabular/query/_lazy_temporal_operations/test_eq.py rename to tests/safeds/data/tabular/query/_lazy_datetime_operations/test_eq.py index 87bff8c16..f21dbb7a6 100644 --- a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_eq.py +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_eq.py @@ -5,16 +5,16 @@ from safeds.data.tabular.containers import Cell, Column from safeds.data.tabular.containers._lazy_cell import _LazyCell -from safeds.data.tabular.query import TemporalOperations +from safeds.data.tabular.query import DatetimeOperations @pytest.mark.parametrize( ("ops_1", "ops_2", "expected"), [ - # equal (duration) + # equal (time) ( - Cell.duration(hours=1).dt, - Cell.duration(hours=1).dt, + Cell.time(1, 0, 0).dt, + Cell.time(1, 0, 0).dt, True, ), # equal (column) @@ -23,10 +23,10 @@ _LazyCell(pl.col("a")).dt, True, ), - # not equal (different durations) + # not equal (different times) ( - Cell.duration(hours=1).dt, - Cell.duration(hours=2).dt, + Cell.time(1, 0, 0).dt, + Cell.time(2, 0, 0).dt, False, ), # not equal (different columns) @@ -37,24 +37,24 @@ ), # not equal (different cell kinds) ( - Cell.duration(hours=1).dt, + Cell.time(1, 0, 0).dt, _LazyCell(pl.col("a")).dt, False, ), ], ids=[ # Equal - "equal (duration)", + "equal (time)", "equal (column)", # Not equal - "not equal (different durations)", + "not equal (different times)", "not equal (different columns)", "not equal (different cell kinds)", ], ) def test_should_return_whether_objects_are_equal( - ops_1: TemporalOperations, - ops_2: TemporalOperations, + ops_1: DatetimeOperations, + ops_2: DatetimeOperations, expected: bool, ) -> None: assert (ops_1.__eq__(ops_2)) == expected @@ -63,28 +63,28 @@ def test_should_return_whether_objects_are_equal( @pytest.mark.parametrize( "ops", [ - Cell.duration(hours=1).dt, + Cell.time(1, 0, 0).dt, _LazyCell(pl.col("a")).dt, ], ids=[ - "duration", + "time", "column", ], ) -def test_should_return_true_if_objects_are_identical(ops: TemporalOperations) -> None: +def test_should_return_true_if_objects_are_identical(ops: DatetimeOperations) -> None: assert (ops.__eq__(ops)) is True @pytest.mark.parametrize( ("ops", "other"), [ - (Cell.duration(hours=1).dt, None), - (Cell.duration(hours=1).dt, Column("col1", [1])), + (Cell.time(1, 0, 0).dt, None), + (Cell.time(1, 0, 0).dt, Column("col1", [1])), ], ids=[ - "TemporalOperations vs. None", - "TemporalOperations vs. Column", + "DatetimeOperations vs. None", + "DatetimeOperations vs. Column", ], ) -def test_should_return_not_implemented_if_other_has_different_type(ops: TemporalOperations, other: Any) -> None: +def test_should_return_not_implemented_if_other_has_different_type(ops: DatetimeOperations, other: Any) -> None: assert (ops.__eq__(other)) is NotImplemented diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_hash.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_hash.py similarity index 70% rename from tests/safeds/data/tabular/query/_lazy_temporal_operations/test_hash.py rename to tests/safeds/data/tabular/query/_lazy_datetime_operations/test_hash.py index 78323a03e..7af6e3776 100644 --- a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_hash.py +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_hash.py @@ -6,29 +6,29 @@ from safeds.data.tabular.containers import Cell from safeds.data.tabular.containers._lazy_cell import _LazyCell -from safeds.data.tabular.query import TemporalOperations +from safeds.data.tabular.query import DatetimeOperations @pytest.mark.parametrize( "ops_factory", [ - lambda: Cell.duration(hours=1).dt, + lambda: Cell.time(1, 0, 0).dt, lambda: _LazyCell(pl.col("a")).dt, ], ids=[ - "duration", + "time", "column", ], ) class TestContract: - def test_should_return_same_hash_for_equal_objects(self, ops_factory: Callable[[], TemporalOperations]) -> None: + def test_should_return_same_hash_for_equal_objects(self, ops_factory: Callable[[], DatetimeOperations]) -> None: ops_1 = ops_factory() ops_2 = ops_factory() assert hash(ops_1) == hash(ops_2) def test_should_return_same_hash_in_different_processes( self, - ops_factory: Callable[[], TemporalOperations], + ops_factory: Callable[[], DatetimeOperations], snapshot: SnapshotAssertion, ) -> None: ops = ops_factory() @@ -38,10 +38,10 @@ def test_should_return_same_hash_in_different_processes( @pytest.mark.parametrize( ("ops_1", "ops_2"), [ - # different durations + # different times ( - Cell.duration(hours=1).dt, - Cell.duration(hours=2).dt, + Cell.time(1, 0, 0).dt, + Cell.time(2, 0, 0).dt, ), # different columns ( @@ -50,15 +50,15 @@ def test_should_return_same_hash_in_different_processes( ), # different cell kinds ( - Cell.duration(hours=1).dt, + Cell.time(1, 0, 0).dt, _LazyCell(pl.col("a")).dt, ), ], ids=[ - "different durations", + "different times", "different columns", "different cell kinds", ], ) -def test_should_be_good_hash(ops_1: TemporalOperations, ops_2: TemporalOperations) -> None: +def test_should_be_good_hash(ops_1: DatetimeOperations, ops_2: DatetimeOperations) -> None: assert hash(ops_1) != hash(ops_2) diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_month.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_month.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_temporal_operations/test_month.py rename to tests/safeds/data/tabular/query/_lazy_datetime_operations/test_month.py diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_repr.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_repr.py similarity index 53% rename from tests/safeds/data/tabular/query/_lazy_temporal_operations/test_repr.py rename to tests/safeds/data/tabular/query/_lazy_datetime_operations/test_repr.py index 1b78568f2..238eaa189 100644 --- a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_repr.py +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_repr.py @@ -1,28 +1,22 @@ import polars as pl import pytest -from safeds.data.tabular.containers import Cell from safeds.data.tabular.containers._lazy_cell import _LazyCell -from safeds.data.tabular.query import TemporalOperations +from safeds.data.tabular.query import DatetimeOperations @pytest.mark.parametrize( ("ops", "expected"), [ - ( - Cell.duration(hours=1).dt, - '_LazyTemporalOperations(1h.alias("duration"))', - ), ( _LazyCell(pl.col("a")).dt, - '_LazyTemporalOperations(col("a"))', + '_LazyDatetimeOperations(col("a"))', ), ], ids=[ - "duration", "column", ], ) -def test_should_return_a_string_representation(ops: TemporalOperations, expected: str) -> None: +def test_should_return_a_string_representation(ops: DatetimeOperations, expected: str) -> None: # We do not care about the exact string representation, this is only for debugging assert repr(ops) == expected diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_sizeof.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_sizeof.py similarity index 66% rename from tests/safeds/data/tabular/query/_lazy_temporal_operations/test_sizeof.py rename to tests/safeds/data/tabular/query/_lazy_datetime_operations/test_sizeof.py index 9907cf70a..d16bc1cf2 100644 --- a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_sizeof.py +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_sizeof.py @@ -5,19 +5,19 @@ from safeds.data.tabular.containers import Cell from safeds.data.tabular.containers._lazy_cell import _LazyCell -from safeds.data.tabular.query import TemporalOperations +from safeds.data.tabular.query import DatetimeOperations @pytest.mark.parametrize( "ops", [ - Cell.duration(hours=1).dt, + Cell.time(1, 0, 0).dt, _LazyCell(pl.col("a")).dt, ], ids=[ - "duration", + "time", "column", ], ) -def test_should_be_larger_than_normal_object(ops: TemporalOperations) -> None: +def test_should_be_larger_than_normal_object(ops: DatetimeOperations) -> None: assert sys.getsizeof(ops) > sys.getsizeof(object()) diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_str.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_str.py similarity index 60% rename from tests/safeds/data/tabular/query/_lazy_temporal_operations/test_str.py rename to tests/safeds/data/tabular/query/_lazy_datetime_operations/test_str.py index 3fa78713c..8cc48623f 100644 --- a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_str.py +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_str.py @@ -1,28 +1,22 @@ import polars as pl import pytest -from safeds.data.tabular.containers import Cell from safeds.data.tabular.containers._lazy_cell import _LazyCell -from safeds.data.tabular.query import TemporalOperations +from safeds.data.tabular.query import DatetimeOperations @pytest.mark.parametrize( ("ops", "expected"), [ - ( - Cell.duration(hours=1).dt, - '(1h.alias("duration")).dt', - ), ( _LazyCell(pl.col("a")).dt, '(col("a")).dt', ), ], ids=[ - "constant", "column", ], ) -def test_should_return_a_string_representation(ops: TemporalOperations, expected: str) -> None: +def test_should_return_a_string_representation(ops: DatetimeOperations, expected: str) -> None: # We do not care about the exact string representation, this is only for debugging assert str(ops) == expected diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_week.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_week.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_temporal_operations/test_week.py rename to tests/safeds/data/tabular/query/_lazy_datetime_operations/test_week.py diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_weekday.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_weekday.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_temporal_operations/test_weekday.py rename to tests/safeds/data/tabular/query/_lazy_datetime_operations/test_weekday.py diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_operations/test_year.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_year.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_temporal_operations/test_year.py rename to tests/safeds/data/tabular/query/_lazy_datetime_operations/test_year.py From 269cd21b9e634a065787b3facbfd55f6e0a9e49b Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Thu, 16 Jan 2025 15:29:14 +0100 Subject: [PATCH 13/57] test: remove some unneeded tests --- .../tabular/query/_lazy_duration_operations/test_repr.py | 6 ------ .../tabular/query/_lazy_duration_operations/test_str.py | 6 ------ .../data/tabular/query/_lazy_math_operations/test_repr.py | 6 ------ .../data/tabular/query/_lazy_math_operations/test_str.py | 6 ------ 4 files changed, 24 deletions(-) diff --git a/tests/safeds/data/tabular/query/_lazy_duration_operations/test_repr.py b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_repr.py index 215904a8d..ec3bbb518 100644 --- a/tests/safeds/data/tabular/query/_lazy_duration_operations/test_repr.py +++ b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_repr.py @@ -1,7 +1,6 @@ import polars as pl import pytest -from safeds.data.tabular.containers import Cell from safeds.data.tabular.containers._lazy_cell import _LazyCell from safeds.data.tabular.query import DurationOperations @@ -9,17 +8,12 @@ @pytest.mark.parametrize( ("ops", "expected"), [ - ( - Cell.duration(hours=1).dur, - '_LazyDurationOperations(1h.alias("duration"))', - ), ( _LazyCell(pl.col("a")).dur, '_LazyDurationOperations(col("a"))', ), ], ids=[ - "duration", "column", ], ) diff --git a/tests/safeds/data/tabular/query/_lazy_duration_operations/test_str.py b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_str.py index 81152e5db..0cb235d11 100644 --- a/tests/safeds/data/tabular/query/_lazy_duration_operations/test_str.py +++ b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_str.py @@ -1,7 +1,6 @@ import polars as pl import pytest -from safeds.data.tabular.containers import Cell from safeds.data.tabular.containers._lazy_cell import _LazyCell from safeds.data.tabular.query import DurationOperations @@ -9,17 +8,12 @@ @pytest.mark.parametrize( ("ops", "expected"), [ - ( - Cell.duration(hours=1).dur, - '(1h.alias("duration")).dur', - ), ( _LazyCell(pl.col("a")).dur, '(col("a")).dur', ), ], ids=[ - "constant", "column", ], ) diff --git a/tests/safeds/data/tabular/query/_lazy_math_operations/test_repr.py b/tests/safeds/data/tabular/query/_lazy_math_operations/test_repr.py index c54ddcce4..b3657bc63 100644 --- a/tests/safeds/data/tabular/query/_lazy_math_operations/test_repr.py +++ b/tests/safeds/data/tabular/query/_lazy_math_operations/test_repr.py @@ -1,7 +1,6 @@ import polars as pl import pytest -from safeds.data.tabular.containers import Cell from safeds.data.tabular.containers._lazy_cell import _LazyCell from safeds.data.tabular.query import MathOperations @@ -9,17 +8,12 @@ @pytest.mark.parametrize( ("ops", "expected"), [ - ( - Cell.constant(1).math, - "_LazyMathOperations(dyn int: 1)", - ), ( _LazyCell(pl.col("a")).math, '_LazyMathOperations(col("a"))', ), ], ids=[ - "constant", "column", ], ) diff --git a/tests/safeds/data/tabular/query/_lazy_math_operations/test_str.py b/tests/safeds/data/tabular/query/_lazy_math_operations/test_str.py index 68988573d..6e40ae607 100644 --- a/tests/safeds/data/tabular/query/_lazy_math_operations/test_str.py +++ b/tests/safeds/data/tabular/query/_lazy_math_operations/test_str.py @@ -1,7 +1,6 @@ import polars as pl import pytest -from safeds.data.tabular.containers import Cell from safeds.data.tabular.containers._lazy_cell import _LazyCell from safeds.data.tabular.query import MathOperations @@ -9,17 +8,12 @@ @pytest.mark.parametrize( ("ops", "expected"), [ - ( - Cell.constant(1).math, - "(dyn int: 1).math", - ), ( _LazyCell(pl.col("a")).math, '(col("a")).math', ), ], ids=[ - "constant", "column", ], ) From 532451944bffbcff5efc884a985cde5636cacf74 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Thu, 16 Jan 2025 16:16:42 +0100 Subject: [PATCH 14/57] docs: include summary --- mkdocs.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/mkdocs.yml b/mkdocs.yml index 343a4d52c..527e9e50c 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -84,6 +84,7 @@ plugins: show_signature: false show_symbol_type_heading: true show_symbol_type_toc: true + summary: true - gen-files: scripts: - docs/reference/generate_reference_pages.py From c5bb2af92b4a94d4a2b7a78484626251a327d00a Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Thu, 16 Jan 2025 16:17:18 +0100 Subject: [PATCH 15/57] docs: use list instead of table (more readable in IDE) --- src/safeds/data/tabular/containers/_table.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 9cb1b8fb1..c2310dabd 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -65,13 +65,11 @@ class Table: To create a `Table` call the constructor or use one of the following static methods: - | Method | Description | - | ---------------------------------------------------------------------------------- | -------------------------------------- | - | [from_csv_file][safeds.data.tabular.containers._table.Table.from_csv_file] | Create a table from a CSV file. | - | [from_json_file][safeds.data.tabular.containers._table.Table.from_json_file] | Create a table from a JSON file. | - | [from_parquet_file][safeds.data.tabular.containers._table.Table.from_parquet_file] | Create a table from a Parquet file. | - | [from_columns][safeds.data.tabular.containers._table.Table.from_columns] | Create a table from a list of columns. | - | [from_dict][safeds.data.tabular.containers._table.Table.from_dict] | Create a table from a dictionary. | + - [from_csv_file][safeds.data.tabular.containers._table.Table.from_csv_file]: Create a table from a CSV file. + - [from_json_file][safeds.data.tabular.containers._table.Table.from_json_file]: Create a table from a JSON file. + - [from_parquet_file][safeds.data.tabular.containers._table.Table.from_parquet_file]: Create a table from a Parquet file. + - [from_columns][safeds.data.tabular.containers._table.Table.from_columns]: Create a table from a list of columns. + - [from_dict][safeds.data.tabular.containers._table.Table.from_dict]: Create a table from a dictionary. Parameters ---------- From dc9ce74418bcf7d3b065b3478340ada5f3982974 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Thu, 16 Jan 2025 16:18:05 +0100 Subject: [PATCH 16/57] docs: document parameters and results of cell, so they show up in the generated documentation --- src/safeds/data/tabular/containers/_cell.py | 178 ++++++++++++++++++-- 1 file changed, 167 insertions(+), 11 deletions(-) diff --git a/src/safeds/data/tabular/containers/_cell.py b/src/safeds/data/tabular/containers/_cell.py index 3326acc4e..b71da80de 100644 --- a/src/safeds/data/tabular/containers/_cell.py +++ b/src/safeds/data/tabular/containers/_cell.py @@ -28,13 +28,13 @@ class Cell(ABC, Generic[T_co]): You only need to interact with this class in callbacks passed to higher-order functions. Most operations are grouped into namespaces, which are accessed through the following attributes: - -`dt` (operations on datetime/date/time values) - -`dur` (operations on durations) - -`math` (mathematical operations on numbers) - -`str` (operations on strings) + - `dt`: Operations on datetime/date/time values + - `dur`: Operations on durations + - `math`: Mathematical operations on numbers + - `str`: Operations on strings - This class only has methods that are not specific to a data type (e.g. `cast`), and methods with corresponding - operators (e.g. `add` for `+`). + This class only has methods that are not specific to a data type (e.g. `cast`), methods with corresponding + operators (e.g. `add` for `+`), and static methods to create new cells. """ # ------------------------------------------------------------------------------------------------------------------ @@ -618,11 +618,16 @@ def str(self) -> StringOperations: def not_(self) -> Cell[bool | None]: """ - Negate a boolean. This is equivalent to the `~` operator. + Negate a Boolean. This is equivalent to the `~` operator. Do **not** use the `not` operator. Its behavior cannot be overwritten in Python, so it will not work as expected. + Returns + ------- + cell: + The result of the Boolean negation. + Examples -------- >>> from safeds.data.tabular.containers import Column @@ -653,11 +658,21 @@ def not_(self) -> Cell[bool | None]: def and_(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: """ - Perform a boolean AND operation. This is equivalent to the `&` operator. + Perform a Boolean AND operation. This is equivalent to the `&` operator. Do **not** use the `and` operator. Its behavior cannot be overwritten in Python, so it will not work as expected. + Parameters + ---------- + other: + The right operand. + + Returns + ------- + cell: + The result of the conjunction. + Examples -------- >>> from safeds.data.tabular.containers import Column @@ -688,10 +703,20 @@ def and_(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: def or_(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: """ - Perform a boolean OR operation. This is equivalent to the `|` operator. + Perform a Boolean OR operation. This is equivalent to the `|` operator. Do **not** use the `or` operator. Its behavior cannot be overwritten in Python, so it will not work as expected. + Parameters + ---------- + other: + The right operand. + + Returns + ------- + cell: + The result of the disjunction. + Examples -------- >>> from safeds.data.tabular.containers import Column @@ -722,7 +747,17 @@ def or_(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: def xor(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: """ - Perform a boolean XOR operation. This is equivalent to the `^` operator. + Perform a Boolean XOR operation. This is equivalent to the `^` operator. + + Parameters + ---------- + other: + The right operand. + + Returns + ------- + cell: + The result of the exclusive or. Examples -------- @@ -760,6 +795,11 @@ def neg(self) -> Cell: """ Negate the value. This is equivalent to the unary `-` operator. + Returns + ------- + cell: + The negated value. + Examples -------- >>> from safeds.data.tabular.containers import Column @@ -792,6 +832,16 @@ def add(self, other: _ConvertibleToCell) -> Cell: """ Add a value. This is equivalent to the `+` operator. + Parameters + ---------- + other: + The right operand. + + Returns + ------- + cell: + The result of the addition. + Examples -------- >>> from safeds.data.tabular.containers import Column @@ -824,6 +874,16 @@ def div(self, other: _ConvertibleToCell) -> Cell: """ Divide by a value. This is equivalent to the `/` operator. + Parameters + ---------- + other: + The right operand. + + Returns + ------- + cell: + The result of the division. + Examples -------- >>> from safeds.data.tabular.containers import Column @@ -856,6 +916,16 @@ def mod(self, other: _ConvertibleToCell) -> Cell: """ Perform a modulo operation. This is equivalent to the `%` operator. + Parameters + ---------- + other: + The right operand. + + Returns + ------- + cell: + The result of the modulo operation. + Examples -------- >>> from safeds.data.tabular.containers import Column @@ -890,6 +960,16 @@ def mul(self, other: _ConvertibleToCell) -> Cell: """ Multiply by a value. This is equivalent to the `*` operator. + Parameters + ---------- + other: + The right operand. + + Returns + ------- + cell: + The result of the multiplication. + Examples -------- >>> from safeds.data.tabular.containers import Column @@ -922,6 +1002,16 @@ def pow(self, other: _ConvertibleToCell) -> Cell: """ Raise to a power. This is equivalent to the `**` operator. + Parameters + ---------- + other: + The right operand. + + Returns + ------- + cell: + The result of the exponentiation. + Examples -------- >>> from safeds.data.tabular.containers import Column @@ -937,7 +1027,6 @@ def pow(self, other: _ConvertibleToCell) -> Cell: | null | +------+ - >>> column.transform(lambda cell: cell ** 3) +------+ | a | @@ -955,6 +1044,16 @@ def sub(self, other: _ConvertibleToCell) -> Cell: """ Subtract a value. This is equivalent to the binary `-` operator. + Parameters + ---------- + other: + The right operand. + + Returns + ------- + cell: + The result of the subtraction. + Examples -------- >>> from safeds.data.tabular.containers import Column @@ -1005,6 +1104,18 @@ def eq( - If `propagate_missing_values` is `False`, `None` will be treated as a regular value. Here, `None == None` is `True`. This behavior is useful, if you want to work with missing values, e.g. to filter them out. + Parameters + ---------- + other: + The value to compare to. + propagate_missing_values: + Whether to propagate missing values. + + Returns + ------- + cell: + The result of the comparison. + Examples -------- >>> from safeds.data.tabular.containers import Column @@ -1068,6 +1179,11 @@ def neq( propagate_missing_values: Whether to propagate missing values. + Returns + ------- + cell: + The result of the comparison. + Examples -------- >>> from safeds.data.tabular.containers import Column @@ -1110,6 +1226,16 @@ def ge(self, other: _ConvertibleToCell) -> Cell[bool | None]: """ Check if greater than or equal to a value. This is equivalent to the `>=` operator. + Parameters + ---------- + other: + The value to compare to. + + Returns + ------- + cell: + The result of the comparison. + Examples -------- >>> from safeds.data.tabular.containers import Column @@ -1142,6 +1268,16 @@ def gt(self, other: _ConvertibleToCell) -> Cell[bool | None]: """ Check if greater than a value. This is equivalent to the `>` operator. + Parameters + ---------- + other: + The value to compare to. + + Returns + ------- + cell: + The result of the comparison. + Examples -------- >>> from safeds.data.tabular.containers import Column @@ -1174,6 +1310,16 @@ def le(self, other: _ConvertibleToCell) -> Cell[bool | None]: """ Check if less than or equal to a value. This is equivalent to the `<=` operator. + Parameters + ---------- + other: + The value to compare to. + + Returns + ------- + cell: + The result of the comparison. + Examples -------- >>> from safeds.data.tabular.containers import Column @@ -1206,6 +1352,16 @@ def lt(self, other: _ConvertibleToCell) -> Cell[bool | None]: """ Check if less than a value. This is equivalent to the `<` operator. + Parameters + ---------- + other: + The value to compare to. + + Returns + ------- + cell: + The result of the comparison. + Examples -------- >>> from safeds.data.tabular.containers import Column From de4026d1f901fbe93b3eb4e433c73c2154997d12 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Thu, 16 Jan 2025 16:33:43 +0100 Subject: [PATCH 17/57] docs: use raw text for method name for consistency --- src/safeds/data/tabular/containers/_table.py | 166 +++++++++---------- 1 file changed, 83 insertions(+), 83 deletions(-) diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index c2310dabd..89d7d7dcf 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -65,11 +65,11 @@ class Table: To create a `Table` call the constructor or use one of the following static methods: - - [from_csv_file][safeds.data.tabular.containers._table.Table.from_csv_file]: Create a table from a CSV file. - - [from_json_file][safeds.data.tabular.containers._table.Table.from_json_file]: Create a table from a JSON file. - - [from_parquet_file][safeds.data.tabular.containers._table.Table.from_parquet_file]: Create a table from a Parquet file. - - [from_columns][safeds.data.tabular.containers._table.Table.from_columns]: Create a table from a list of columns. - - [from_dict][safeds.data.tabular.containers._table.Table.from_dict]: Create a table from a dictionary. + - [`from_csv_file`][safeds.data.tabular.containers._table.Table.from_csv_file]: Create a table from a CSV file. + - [`from_json_file`][safeds.data.tabular.containers._table.Table.from_json_file]: Create a table from a JSON file. + - [`from_parquet_file`][safeds.data.tabular.containers._table.Table.from_parquet_file]: Create a table from a Parquet file. + - [`from_columns`][safeds.data.tabular.containers._table.Table.from_columns]: Create a table from a list of columns. + - [`from_dict`][safeds.data.tabular.containers._table.Table.from_dict]: Create a table from a dictionary. Parameters ---------- @@ -195,8 +195,8 @@ def from_csv_file(path: str | Path, *, separator: str = ",") -> Table: Related ------- - - [from_json_file][safeds.data.tabular.containers._table.Table.from_json_file] - - [from_parquet_file][safeds.data.tabular.containers._table.Table.from_parquet_file] + - [`from_json_file`][safeds.data.tabular.containers._table.Table.from_json_file] + - [`from_parquet_file`][safeds.data.tabular.containers._table.Table.from_parquet_file] """ import polars as pl @@ -279,8 +279,8 @@ def from_json_file(path: str | Path) -> Table: Related ------- - - [from_csv_file][safeds.data.tabular.containers._table.Table.from_csv_file] - - [from_parquet_file][safeds.data.tabular.containers._table.Table.from_parquet_file] + - [`from_csv_file`][safeds.data.tabular.containers._table.Table.from_csv_file] + - [`from_parquet_file`][safeds.data.tabular.containers._table.Table.from_parquet_file] """ import polars as pl @@ -326,8 +326,8 @@ def from_parquet_file(path: str | Path) -> Table: Related ------- - - [from_csv_file][safeds.data.tabular.containers._table.Table.from_csv_file] - - [from_json_file][safeds.data.tabular.containers._table.Table.from_json_file] + - [`from_csv_file`][safeds.data.tabular.containers._table.Table.from_csv_file] + - [`from_json_file`][safeds.data.tabular.containers._table.Table.from_json_file] """ import polars as pl @@ -528,9 +528,9 @@ def add_columns( Related ------- - - [add_computed_column][safeds.data.tabular.containers._table.Table.add_computed_column]: + - [`add_computed_column`][safeds.data.tabular.containers._table.Table.add_computed_column]: Add a column with values computed from other columns. - - [add_index_column][safeds.data.tabular.containers._table.Table.add_index_column] + - [`add_index_column`][safeds.data.tabular.containers._table.Table.add_index_column] """ from polars.exceptions import DuplicateError, ShapeError @@ -598,10 +598,10 @@ def add_computed_column( Related ------- - - [add_columns][safeds.data.tabular.containers._table.Table.add_columns]: + - [`add_columns`][safeds.data.tabular.containers._table.Table.add_columns]: Add column objects to the table. - - [add_index_column][safeds.data.tabular.containers._table.Table.add_index_column] - - [transform_columns][safeds.data.tabular.containers._table.Table.transform_columns]: + - [`add_index_column`][safeds.data.tabular.containers._table.Table.add_index_column] + - [`transform_columns`][safeds.data.tabular.containers._table.Table.transform_columns]: Transform existing columns with a custom function. """ _check_columns_dont_exist(self, name) @@ -669,9 +669,9 @@ def add_index_column(self, name: str, *, first_index: int = 0) -> Table: Related ------- - - [add_columns][safeds.data.tabular.containers._table.Table.add_columns]: + - [`add_columns`][safeds.data.tabular.containers._table.Table.add_columns]: Add column objects to the table. - - [add_computed_column][safeds.data.tabular.containers._table.Table.add_computed_column]: + - [`add_computed_column`][safeds.data.tabular.containers._table.Table.add_computed_column]: Add a column with values computed from other columns. """ _check_columns_dont_exist(self, name) @@ -835,10 +835,10 @@ def remove_columns( Related ------- - - [select_columns][safeds.data.tabular.containers._table.Table.select_columns]: + - [`select_columns`][safeds.data.tabular.containers._table.Table.select_columns]: Keep only a subset of the columns. - - [remove_columns_with_missing_values][safeds.data.tabular.containers._table.Table.remove_columns_with_missing_values] - - [remove_non_numeric_columns][safeds.data.tabular.containers._table.Table.remove_non_numeric_columns] + - [`remove_columns_with_missing_values`][safeds.data.tabular.containers._table.Table.remove_columns_with_missing_values] + - [`remove_non_numeric_columns`][safeds.data.tabular.containers._table.Table.remove_non_numeric_columns] """ if isinstance(selector, str): selector = [selector] @@ -899,16 +899,16 @@ def remove_columns_with_missing_values( Related ------- - - [remove_rows_with_missing_values][safeds.data.tabular.containers._table.Table.remove_rows_with_missing_values] - - [SimpleImputer][safeds.data.tabular.transformation._simple_imputer.SimpleImputer]: + - [`remove_rows_with_missing_values`][safeds.data.tabular.containers._table.Table.remove_rows_with_missing_values] + - [`SimpleImputer`][safeds.data.tabular.transformation._simple_imputer.SimpleImputer]: Replace missing values with a constant value or a statistic of the column. - - [KNearestNeighborsImputer][safeds.data.tabular.transformation._k_nearest_neighbors_imputer.KNearestNeighborsImputer]: + - [`KNearestNeighborsImputer`][safeds.data.tabular.transformation._k_nearest_neighbors_imputer.KNearestNeighborsImputer]: Replace missing values with a value computed from the nearest neighbors. - - [select_columns][safeds.data.tabular.containers._table.Table.select_columns]: + - [`select_columns`][safeds.data.tabular.containers._table.Table.select_columns]: Keep only a subset of the columns. - - [remove_columns][safeds.data.tabular.containers._table.Table.remove_columns]: + - [`remove_columns`][safeds.data.tabular.containers._table.Table.remove_columns]: Remove columns from the table by name. - - [remove_non_numeric_columns][safeds.data.tabular.containers._table.Table.remove_non_numeric_columns] + - [`remove_non_numeric_columns`][safeds.data.tabular.containers._table.Table.remove_non_numeric_columns] """ import polars as pl @@ -959,11 +959,11 @@ def remove_non_numeric_columns(self) -> Table: Related ------- - - [select_columns][safeds.data.tabular.containers._table.Table.select_columns]: + - [`select_columns`][safeds.data.tabular.containers._table.Table.select_columns]: Keep only a subset of the columns. - - [remove_columns][safeds.data.tabular.containers._table.Table.remove_columns]: + - [`remove_columns`][safeds.data.tabular.containers._table.Table.remove_columns]: Remove columns from the table by name. - - [remove_columns_with_missing_values][safeds.data.tabular.containers._table.Table.remove_columns_with_missing_values] + - [`remove_columns_with_missing_values`][safeds.data.tabular.containers._table.Table.remove_columns_with_missing_values] """ import polars.selectors as cs @@ -1157,10 +1157,10 @@ def select_columns( Related ------- - - [remove_columns][safeds.data.tabular.containers._table.Table.remove_columns]: + - [`remove_columns`][safeds.data.tabular.containers._table.Table.remove_columns]: Remove columns from the table by name. - - [remove_columns_with_missing_values][safeds.data.tabular.containers._table.Table.remove_columns_with_missing_values] - - [remove_non_numeric_columns][safeds.data.tabular.containers._table.Table.remove_non_numeric_columns] + - [`remove_columns_with_missing_values`][safeds.data.tabular.containers._table.Table.remove_columns_with_missing_values] + - [`remove_non_numeric_columns`][safeds.data.tabular.containers._table.Table.remove_non_numeric_columns] """ _check_columns_exist(self, selector) @@ -1235,9 +1235,9 @@ def transform_columns( Related ------- - - [add_computed_column][safeds.data.tabular.containers._table.Table.add_computed_column]: + - [`add_computed_column`][safeds.data.tabular.containers._table.Table.add_computed_column]: Add a new column that is computed from other columns. - - [transform_table][safeds.data.tabular.containers._table.Table.transform_table]: + - [`transform_table`][safeds.data.tabular.containers._table.Table.transform_table]: Transform the entire table with a fitted transformer. """ import polars as pl @@ -1375,11 +1375,11 @@ def filter_rows( Related ------- - - [filter_rows_by_column][safeds.data.tabular.containers._table.Table.filter_rows_by_column]: + - [`filter_rows_by_column`][safeds.data.tabular.containers._table.Table.filter_rows_by_column]: Keep only rows that satisfy a condition on a specific column. - - [remove_duplicate_rows][safeds.data.tabular.containers._table.Table.remove_duplicate_rows] - - [remove_rows_with_missing_values][safeds.data.tabular.containers._table.Table.remove_rows_with_missing_values] - - [remove_rows_with_outliers][safeds.data.tabular.containers._table.Table.remove_rows_with_outliers] + - [`remove_duplicate_rows`][safeds.data.tabular.containers._table.Table.remove_duplicate_rows] + - [`remove_rows_with_missing_values`][safeds.data.tabular.containers._table.Table.remove_rows_with_missing_values] + - [`remove_rows_with_outliers`][safeds.data.tabular.containers._table.Table.remove_rows_with_outliers] """ mask = predicate(_LazyVectorizedRow(self)) @@ -1429,11 +1429,11 @@ def filter_rows_by_column( Related ------- - - [filter_rows][safeds.data.tabular.containers._table.Table.filter_rows]: + - [`filter_rows`][safeds.data.tabular.containers._table.Table.filter_rows]: Keep only rows that satisfy a condition. - - [remove_duplicate_rows][safeds.data.tabular.containers._table.Table.remove_duplicate_rows] - - [remove_rows_with_missing_values][safeds.data.tabular.containers._table.Table.remove_rows_with_missing_values] - - [remove_rows_with_outliers][safeds.data.tabular.containers._table.Table.remove_rows_with_outliers] + - [`remove_duplicate_rows`][safeds.data.tabular.containers._table.Table.remove_duplicate_rows] + - [`remove_rows_with_missing_values`][safeds.data.tabular.containers._table.Table.remove_rows_with_missing_values] + - [`remove_rows_with_outliers`][safeds.data.tabular.containers._table.Table.remove_rows_with_outliers] """ _check_columns_exist(self, name) @@ -1472,12 +1472,12 @@ def remove_duplicate_rows(self) -> Table: Related ------- - - [filter_rows][safeds.data.tabular.containers._table.Table.filter_rows]: + - [`filter_rows`][safeds.data.tabular.containers._table.Table.filter_rows]: Keep only rows that satisfy a condition. - - [filter_rows_by_column][safeds.data.tabular.containers._table.Table.filter_rows_by_column]: + - [`filter_rows_by_column`][safeds.data.tabular.containers._table.Table.filter_rows_by_column]: Keep only rows that satisfy a condition on a specific column. - - [remove_rows_with_missing_values][safeds.data.tabular.containers._table.Table.remove_rows_with_missing_values] - - [remove_rows_with_outliers][safeds.data.tabular.containers._table.Table.remove_rows_with_outliers] + - [`remove_rows_with_missing_values`][safeds.data.tabular.containers._table.Table.remove_rows_with_missing_values] + - [`remove_rows_with_outliers`][safeds.data.tabular.containers._table.Table.remove_rows_with_outliers] """ return Table._from_polars_lazy_frame( self._lazy_frame.unique(maintain_order=True), @@ -1518,15 +1518,15 @@ def remove_rows( Related ------- - - [filter_rows][safeds.data.tabular.containers._table.Table.filter_rows]: + - [`filter_rows`][safeds.data.tabular.containers._table.Table.filter_rows]: Keep only rows that satisfy a condition. - - [filter_rows_by_column][safeds.data.tabular.containers._table.Table.filter_rows_by_column]: + - [`filter_rows_by_column`][safeds.data.tabular.containers._table.Table.filter_rows_by_column]: Keep only rows that satisfy a condition on a specific column. - - [remove_rows_by_column][safeds.data.tabular.containers._table.Table.filter_rows_by_column]: + - [`remove_rows_by_column`][safeds.data.tabular.containers._table.Table.filter_rows_by_column]: Remove rows that satisfy a condition on a specific column. - - [remove_duplicate_rows][safeds.data.tabular.containers._table.Table.remove_duplicate_rows] - - [remove_rows_with_missing_values][safeds.data.tabular.containers._table.Table.remove_rows_with_missing_values] - - [remove_rows_with_outliers][safeds.data.tabular.containers._table.Table.remove_rows_with_outliers] + - [`remove_duplicate_rows`][safeds.data.tabular.containers._table.Table.remove_duplicate_rows] + - [`remove_rows_with_missing_values`][safeds.data.tabular.containers._table.Table.remove_rows_with_missing_values] + - [`remove_rows_with_outliers`][safeds.data.tabular.containers._table.Table.remove_rows_with_outliers] """ mask = predicate(_LazyVectorizedRow(self)) @@ -1577,15 +1577,15 @@ def remove_rows_by_column( Related ------- - - [filter_rows][safeds.data.tabular.containers._table.Table.filter_rows]: + - [`filter_rows`][safeds.data.tabular.containers._table.Table.filter_rows]: Keep only rows that satisfy a condition. - - [filter_rows_by_column][safeds.data.tabular.containers._table.Table.filter_rows_by_column]: + - [`filter_rows_by_column`][safeds.data.tabular.containers._table.Table.filter_rows_by_column]: Keep only rows that satisfy a condition on a specific column. - - [remove_rows][safeds.data.tabular.containers._table.Table.remove_rows]: + - [`remove_rows`][safeds.data.tabular.containers._table.Table.remove_rows]: Remove rows that satisfy a condition. - - [remove_duplicate_rows][safeds.data.tabular.containers._table.Table.remove_duplicate_rows] - - [remove_rows_with_missing_values][safeds.data.tabular.containers._table.Table.remove_rows_with_missing_values] - - [remove_rows_with_outliers][safeds.data.tabular.containers._table.Table.remove_rows_with_outliers] + - [`remove_duplicate_rows`][safeds.data.tabular.containers._table.Table.remove_duplicate_rows] + - [`remove_rows_with_missing_values`][safeds.data.tabular.containers._table.Table.remove_rows_with_missing_values] + - [`remove_rows_with_outliers`][safeds.data.tabular.containers._table.Table.remove_rows_with_outliers] """ _check_columns_exist(self, name) @@ -1646,17 +1646,17 @@ def remove_rows_with_missing_values( Related ------- - - [remove_columns_with_missing_values][safeds.data.tabular.containers._table.Table.remove_columns_with_missing_values] - - [SimpleImputer][safeds.data.tabular.transformation._simple_imputer.SimpleImputer]: + - [`remove_columns_with_missing_values`][safeds.data.tabular.containers._table.Table.remove_columns_with_missing_values] + - [`SimpleImputer`][safeds.data.tabular.transformation._simple_imputer.SimpleImputer]: Replace missing values with a constant value or a statistic of the column. - - [KNearestNeighborsImputer][safeds.data.tabular.transformation._k_nearest_neighbors_imputer.KNearestNeighborsImputer]: + - [`KNearestNeighborsImputer`][safeds.data.tabular.transformation._k_nearest_neighbors_imputer.KNearestNeighborsImputer]: Replace missing values with a value computed from the nearest neighbors. - - [filter_rows][safeds.data.tabular.containers._table.Table.filter_rows]: + - [`filter_rows`][safeds.data.tabular.containers._table.Table.filter_rows]: Keep only rows that satisfy a condition. - - [filter_rows_by_column][safeds.data.tabular.containers._table.Table.filter_rows_by_column]: + - [`filter_rows_by_column`][safeds.data.tabular.containers._table.Table.filter_rows_by_column]: Keep only rows that satisfy a condition on a specific column. - - [remove_duplicate_rows][safeds.data.tabular.containers._table.Table.remove_duplicate_rows] - - [remove_rows_with_outliers][safeds.data.tabular.containers._table.Table.remove_rows_with_outliers] + - [`remove_duplicate_rows`][safeds.data.tabular.containers._table.Table.remove_duplicate_rows] + - [`remove_rows_with_outliers`][safeds.data.tabular.containers._table.Table.remove_rows_with_outliers] """ if isinstance(selector, list) and not selector: # polars panics in this case @@ -1731,12 +1731,12 @@ def remove_rows_with_outliers( Related ------- - - [filter_rows][safeds.data.tabular.containers._table.Table.filter_rows]: + - [`filter_rows`][safeds.data.tabular.containers._table.Table.filter_rows]: Keep only rows that satisfy a condition. - - [filter_rows_by_column][safeds.data.tabular.containers._table.Table.filter_rows_by_column]: + - [`filter_rows_by_column`][safeds.data.tabular.containers._table.Table.filter_rows_by_column]: Keep only rows that satisfy a condition on a specific column. - - [remove_duplicate_rows][safeds.data.tabular.containers._table.Table.remove_duplicate_rows] - - [remove_rows_with_missing_values][safeds.data.tabular.containers._table.Table.remove_rows_with_missing_values] + - [`remove_duplicate_rows`][safeds.data.tabular.containers._table.Table.remove_duplicate_rows] + - [`remove_rows_with_missing_values`][safeds.data.tabular.containers._table.Table.remove_rows_with_missing_values] """ _check_bounds( "z_score_threshold", @@ -1905,7 +1905,7 @@ def sort_rows( Related ------- - - [sort_rows_by_column][safeds.data.tabular.containers._table.Table.sort_rows_by_column]: + - [`sort_rows_by_column`][safeds.data.tabular.containers._table.Table.sort_rows_by_column]: Sort the rows by a specific column. """ key = key_selector(_LazyVectorizedRow(self)) @@ -1963,7 +1963,7 @@ def sort_rows_by_column( Related ------- - - [sort_rows][safeds.data.tabular.containers._table.Table.sort_rows]: + - [`sort_rows`][safeds.data.tabular.containers._table.Table.sort_rows]: Sort the rows by a value computed from an entire row. """ _check_columns_exist(self, name) @@ -2101,7 +2101,7 @@ def add_tables_as_columns(self, others: Table | list[Table]) -> Table: Related ------- - - [add_tables_as_rows][safeds.data.tabular.containers._table.Table.add_tables_as_rows] + - [`add_tables_as_rows`][safeds.data.tabular.containers._table.Table.add_tables_as_rows] """ import polars as pl @@ -2158,7 +2158,7 @@ def add_tables_as_rows(self, others: Table | list[Table]) -> Table: Related ------- - - [add_tables_as_columns][safeds.data.tabular.containers._table.Table.add_tables_as_columns] + - [`add_tables_as_columns`][safeds.data.tabular.containers._table.Table.add_tables_as_columns] """ import polars as pl @@ -2221,7 +2221,7 @@ def inverse_transform_table(self, fitted_transformer: InvertibleTableTransformer Related ------- - - [transform_table][safeds.data.tabular.containers._table.Table.transform_table]: + - [`transform_table`][safeds.data.tabular.containers._table.Table.transform_table]: Transform the table with a fitted transformer. """ return fitted_transformer.inverse_transform(self) @@ -2417,9 +2417,9 @@ def transform_table(self, fitted_transformer: TableTransformer) -> Table: Related ------- - - [inverse_transform_table][safeds.data.tabular.containers._table.Table.inverse_transform_table]: + - [`inverse_transform_table`][safeds.data.tabular.containers._table.Table.inverse_transform_table]: Inverse-transform the table with a fitted, invertible transformer. - - [transform_columns][safeds.data.tabular.containers._table.Table.transform_columns]: + - [`transform_columns`][safeds.data.tabular.containers._table.Table.transform_columns]: Transform columns with a custom function. """ return fitted_transformer.transform(self) @@ -2575,8 +2575,8 @@ def to_csv_file(self, path: str | Path) -> None: Related ------- - - [to_json_file][safeds.data.tabular.containers._table.Table.to_json_file] - - [to_parquet_file][safeds.data.tabular.containers._table.Table.to_parquet_file] + - [`to_json_file`][safeds.data.tabular.containers._table.Table.to_json_file] + - [`to_parquet_file`][safeds.data.tabular.containers._table.Table.to_parquet_file] """ path = _normalize_and_check_file_path(path, ".csv", [".csv"]) path.parent.mkdir(parents=True, exist_ok=True) @@ -2631,8 +2631,8 @@ def to_json_file( Related ------- - - [to_csv_file][safeds.data.tabular.containers._table.Table.to_csv_file] - - [to_parquet_file][safeds.data.tabular.containers._table.Table.to_parquet_file] + - [`to_csv_file`][safeds.data.tabular.containers._table.Table.to_csv_file] + - [`to_parquet_file`][safeds.data.tabular.containers._table.Table.to_parquet_file] """ path = _normalize_and_check_file_path(path, ".json", [".json"]) path.parent.mkdir(parents=True, exist_ok=True) @@ -2665,8 +2665,8 @@ def to_parquet_file(self, path: str | Path) -> None: Related ------- - - [to_csv_file][safeds.data.tabular.containers._table.Table.to_csv_file] - - [to_json_file][safeds.data.tabular.containers._table.Table.to_json_file] + - [`to_csv_file`][safeds.data.tabular.containers._table.Table.to_csv_file] + - [`to_json_file`][safeds.data.tabular.containers._table.Table.to_json_file] """ path = _normalize_and_check_file_path(path, ".parquet", [".parquet"]) path.parent.mkdir(parents=True, exist_ok=True) From 4ad2e4551564febd78071b7d017a7811a8d927e4 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Thu, 16 Jan 2025 17:44:50 +0100 Subject: [PATCH 18/57] feat: duration operations --- .../tabular/query/_duration_operations.py | 253 ++++++++++++++++++ .../query/_lazy_duration_operations.py | 40 +++ tests/helpers/_assertions.py | 2 +- .../_lazy_duration_operations/test_abs.py | 31 +++ .../test_full_days.py | 29 ++ .../test_full_hours.py | 29 ++ .../test_full_microseconds.py | 30 +++ .../test_full_milliseconds.py | 34 +++ .../test_full_minutes.py | 34 +++ .../test_full_seconds.py | 34 +++ .../test_full_weeks.py | 29 ++ .../test_to_string.py | 127 +++++++++ 12 files changed, 671 insertions(+), 1 deletion(-) create mode 100644 tests/safeds/data/tabular/query/_lazy_duration_operations/test_abs.py create mode 100644 tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_days.py create mode 100644 tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_hours.py create mode 100644 tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_microseconds.py create mode 100644 tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_milliseconds.py create mode 100644 tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_minutes.py create mode 100644 tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_seconds.py create mode 100644 tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_weeks.py create mode 100644 tests/safeds/data/tabular/query/_lazy_duration_operations/test_to_string.py diff --git a/src/safeds/data/tabular/query/_duration_operations.py b/src/safeds/data/tabular/query/_duration_operations.py index 4285965dd..30d032003 100644 --- a/src/safeds/data/tabular/query/_duration_operations.py +++ b/src/safeds/data/tabular/query/_duration_operations.py @@ -35,3 +35,256 @@ def __sizeof__(self) -> int: ... @abstractmethod def __str__(self) -> str: ... + + # ------------------------------------------------------------------------------------------------------------------ + # Duration operations + # ------------------------------------------------------------------------------------------------------------------ + + @abstractmethod + def abs(self) -> Cell[None]: + """ + Get the absolute value of the duration. + + Returns + ------- + cell: + The absolute value. + + Examples + -------- + >>> from datetime import timedelta + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [timedelta(days=-1), timedelta(days=1), None]) + >>> column.transform(lambda cell: cell.dur.abs()) + +--------------+ + | a | + | --- | + | duration[μs] | + +==============+ + | 1d | + | 1d | + | null | + +--------------+ + """ + + @abstractmethod + def full_weeks(self) -> Cell[int | None]: + """ + Get the number of full weeks in the duration. The result is rounded toward zero. + + Returns + ------- + cell: + The number of full weeks. + + Examples + -------- + >>> from datetime import timedelta + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [timedelta(days=8), timedelta(days=6), None]) + >>> column.transform(lambda cell: cell.dur.full_weeks()) + +------+ + | a | + | --- | + | i64 | + +======+ + | 1 | + | 0 | + | null | + +------+ + """ + + @abstractmethod + def full_days(self) -> Cell[int | None]: + """ + Get the number of full days in the duration. The result is rounded toward zero. + + Returns + ------- + cell: + The number of full days. + + Examples + -------- + >>> from datetime import timedelta + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [timedelta(hours=25), timedelta(hours=23), None]) + >>> column.transform(lambda cell: cell.dur.full_days()) + +------+ + | a | + | --- | + | i64 | + +======+ + | 1 | + | 0 | + | null | + +------+ + """ + + @abstractmethod + def full_hours(self) -> Cell[int | None]: + """ + Get the number of full hours in the duration. The result is rounded toward zero. + + Returns + ------- + cell: + The number of full hours. + + Examples + -------- + >>> from datetime import timedelta + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [timedelta(minutes=61), timedelta(minutes=59), None]) + >>> column.transform(lambda cell: cell.dur.full_hours()) + +------+ + | a | + | --- | + | i64 | + +======+ + | 1 | + | 0 | + | null | + +------+ + """ + + @abstractmethod + def full_minutes(self) -> Cell[int | None]: + """ + Get the number of full minutes in the duration. The result is rounded toward zero. + + Returns + ------- + cell: + The number of full minutes. + + Examples + -------- + >>> from datetime import timedelta + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [timedelta(seconds=61), timedelta(seconds=59), None]) + >>> column.transform(lambda cell: cell.dur.full_minutes()) + +------+ + | a | + | --- | + | i64 | + +======+ + | 1 | + | 0 | + | null | + +------+ + """ + + @abstractmethod + def full_seconds(self) -> Cell[int | None]: + """ + Get the number of full seconds in the duration. The result is rounded toward zero. + + Returns + ------- + cell: + The number of full seconds. + + Examples + -------- + >>> from datetime import timedelta + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [timedelta(milliseconds=1001), timedelta(milliseconds=999), None]) + >>> column.transform(lambda cell: cell.dur.full_seconds()) + +------+ + | a | + | --- | + | i64 | + +======+ + | 1 | + | 0 | + | null | + +------+ + """ + + @abstractmethod + def full_milliseconds(self) -> Cell[int | None]: + """ + Get the number of full milliseconds in the duration. The result is rounded toward zero. + + Returns + ------- + cell: + The number of full milliseconds. + + Examples + -------- + >>> from datetime import timedelta + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [timedelta(microseconds=1001), timedelta(microseconds=999), None]) + >>> column.transform(lambda cell: cell.dur.full_milliseconds()) + +------+ + | a | + | --- | + | i64 | + +======+ + | 1 | + | 0 | + | null | + +------+ + """ + + @abstractmethod + def full_microseconds(self) -> Cell[int | None]: + """ + Get the number of full microseconds in the duration. The result is rounded toward zero. + + Since durations only have microsecond resolution at the moment, the rounding has no effect. This may change in + the future. + + Returns + ------- + cell: + The number of full microseconds. + + Examples + -------- + >>> from datetime import timedelta + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [timedelta(microseconds=1001), timedelta(microseconds=999), None]) + >>> column.transform(lambda cell: cell.dur.full_microseconds()) + +------+ + | a | + | --- | + | i64 | + +======+ + | 1001 | + | 999 | + | null | + +------+ + """ + + @abstractmethod + def to_string( + self, + *, + format: Literal["iso", "pretty"] = "iso", + ) -> Cell[str | None]: + """ + Convert the duration to a string. + + The following formats are supported: + + - `"iso"`: The duration is represented in the ISO 8601 format. This is the default. + - `"pretty"`: The duration is represented in a human-readable format. + + !!! warning "API Stability" + + Do not rely on the exact output of the `"pretty"` format. In future versions, we may change it without prior + notice. + + Parameters + ---------- + format: + The format to use. + + Returns + ------- + cell: + The string representation. + """ diff --git a/src/safeds/data/tabular/query/_lazy_duration_operations.py b/src/safeds/data/tabular/query/_lazy_duration_operations.py index 2584d0493..62f10b66f 100644 --- a/src/safeds/data/tabular/query/_lazy_duration_operations.py +++ b/src/safeds/data/tabular/query/_lazy_duration_operations.py @@ -38,3 +38,43 @@ def __sizeof__(self) -> int: def __str__(self) -> str: return f"({self._expression}).dur" + + # ------------------------------------------------------------------------------------------------------------------ + # Duration operations + # ------------------------------------------------------------------------------------------------------------------ + + def abs(self) -> Cell[None]: + return _LazyCell(self._expression.abs()) + + def full_weeks(self) -> Cell[int | None]: + import polars as pl + + # We must round towards zero + return _LazyCell((self._expression.dt.total_days() / 7).cast(pl.Int64())) + + def full_days(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.total_days()) + + def full_hours(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.total_hours()) + + def full_minutes(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.total_minutes()) + + def full_seconds(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.total_seconds()) + + def full_milliseconds(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.total_milliseconds()) + + def full_microseconds(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.total_microseconds()) + + def to_string( + self, + *, + format: Literal["iso", "pretty"] = "iso", + ) -> Cell[str | None]: + polars_format = "iso" if format == "iso" else "polars" + + return _LazyCell(self._expression.dt.to_string(polars_format)) diff --git a/tests/helpers/_assertions.py b/tests/helpers/_assertions.py index 773abb22c..5846c28f9 100644 --- a/tests/helpers/_assertions.py +++ b/tests/helpers/_assertions.py @@ -84,7 +84,7 @@ def assert_cell_operation_works( The type of the column if the value is `None`. """ type_ = type_if_none if value is None else None - column = Column("A", [value], type=type_) + column = Column("a", [value], type=type_) transformed_column = column.transform(transformer) actual = transformed_column[0] assert actual == expected, f"Expected {expected}, but got {actual}." diff --git a/tests/safeds/data/tabular/query/_lazy_duration_operations/test_abs.py b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_abs.py new file mode 100644 index 000000000..7c1f5b023 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_abs.py @@ -0,0 +1,31 @@ +from datetime import timedelta + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (timedelta(days=1), timedelta(days=1)), + (timedelta(days=1, hours=12), timedelta(days=1, hours=12)), + (timedelta(days=-1), timedelta(days=1)), + (timedelta(days=-1, hours=-12), timedelta(days=1, hours=12)), + (timedelta(days=1, hours=-12), timedelta(hours=12)), + (timedelta(days=-1, hours=12), timedelta(hours=12)), + (None, None), + ], + ids=[ + "positive days", + "positive days and hours", + "negative days", + "negative days and hours", + "positive days, negative hours", + "negative days, positive hours", + "None", + ], +) +def test_should_return_absolute_duration(value: timedelta | None, expected: timedelta | None) -> None: + assert_cell_operation_works(value, lambda cell: cell.dur.abs(), expected, type_if_none=ColumnType.duration()) diff --git a/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_days.py b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_days.py new file mode 100644 index 000000000..facdf0214 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_days.py @@ -0,0 +1,29 @@ +from datetime import timedelta + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (timedelta(days=1), 1), + (timedelta(days=1, hours=12), 1), + (timedelta(days=-1), -1), + (timedelta(days=-1, hours=-12), -1), + (timedelta(days=1, hours=-12), 0), + (None, None), + ], + ids=[ + "positive, exact", + "positive, rounded", + "negative, exact", + "negative, rounded", + "mixed", + "None", + ], +) +def test_should_return_full_days(value: timedelta | None, expected: int | None) -> None: + assert_cell_operation_works(value, lambda cell: cell.dur.full_days(), expected, type_if_none=ColumnType.duration()) diff --git a/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_hours.py b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_hours.py new file mode 100644 index 000000000..dc01ec4b8 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_hours.py @@ -0,0 +1,29 @@ +from datetime import timedelta + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (timedelta(hours=1), 1), + (timedelta(hours=1, minutes=30), 1), + (timedelta(hours=-1), -1), + (timedelta(hours=-1, minutes=-30), -1), + (timedelta(hours=1, minutes=-30), 0), + (None, None), + ], + ids=[ + "positive, exact", + "positive, rounded", + "negative, exact", + "negative, rounded", + "mixed", + "None", + ], +) +def test_should_return_full_hours(value: timedelta | None, expected: int | None) -> None: + assert_cell_operation_works(value, lambda cell: cell.dur.full_hours(), expected, type_if_none=ColumnType.duration()) diff --git a/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_microseconds.py b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_microseconds.py new file mode 100644 index 000000000..e1bcd6271 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_microseconds.py @@ -0,0 +1,30 @@ +from datetime import timedelta + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (timedelta(microseconds=1), 1), + (timedelta(microseconds=-1), -1), + (timedelta(milliseconds=1, microseconds=-500), 500), + (None, None), + ], + ids=[ + "positive, exact", + "negative, exact", + "mixed", + "None", + ], +) +def test_should_return_full_microseconds(value: timedelta | None, expected: int | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dur.full_microseconds(), + expected, + type_if_none=ColumnType.duration(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_milliseconds.py b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_milliseconds.py new file mode 100644 index 000000000..5bcd0d58e --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_milliseconds.py @@ -0,0 +1,34 @@ +from datetime import timedelta + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (timedelta(milliseconds=1), 1), + (timedelta(milliseconds=1, microseconds=500), 1), + (timedelta(milliseconds=-1), -1), + (timedelta(milliseconds=-1, microseconds=-500), -1), + (timedelta(milliseconds=1, microseconds=-500), 0), + (None, None), + ], + ids=[ + "positive, exact", + "positive, rounded", + "negative, exact", + "negative, rounded", + "mixed", + "None", + ], +) +def test_should_return_full_milliseconds(value: timedelta | None, expected: int | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dur.full_milliseconds(), + expected, + type_if_none=ColumnType.duration(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_minutes.py b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_minutes.py new file mode 100644 index 000000000..21e962307 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_minutes.py @@ -0,0 +1,34 @@ +from datetime import timedelta + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (timedelta(minutes=1), 1), + (timedelta(minutes=1, seconds=30), 1), + (timedelta(minutes=-1), -1), + (timedelta(minutes=-1, seconds=-30), -1), + (timedelta(minutes=1, seconds=-30), 0), + (None, None), + ], + ids=[ + "positive, exact", + "positive, rounded", + "negative, exact", + "negative, rounded", + "mixed", + "None", + ], +) +def test_should_return_full_minutes(value: timedelta | None, expected: int | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dur.full_minutes(), + expected, + type_if_none=ColumnType.duration(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_seconds.py b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_seconds.py new file mode 100644 index 000000000..7d1611a59 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_seconds.py @@ -0,0 +1,34 @@ +from datetime import timedelta + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (timedelta(seconds=1), 1), + (timedelta(seconds=1, milliseconds=500), 1), + (timedelta(seconds=-1), -1), + (timedelta(seconds=-1, milliseconds=-500), -1), + (timedelta(seconds=1, milliseconds=-500), 0), + (None, None), + ], + ids=[ + "positive, exact", + "positive, rounded", + "negative, exact", + "negative, rounded", + "mixed", + "None", + ], +) +def test_should_return_full_seconds(value: timedelta | None, expected: int | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dur.full_seconds(), + expected, + type_if_none=ColumnType.duration(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_weeks.py b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_weeks.py new file mode 100644 index 000000000..f2f2a3ebd --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_weeks.py @@ -0,0 +1,29 @@ +from datetime import timedelta + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (timedelta(weeks=1), 1), + (timedelta(weeks=1, days=3), 1), + (timedelta(weeks=-1), -1), + (timedelta(weeks=-1, days=-3), -1), + (timedelta(weeks=1, days=-3), 0), + (None, None), + ], + ids=[ + "positive, exact", + "positive, rounded", + "negative, exact", + "negative, rounded", + "mixed", + "None", + ], +) +def test_should_return_full_weeks(value: timedelta | None, expected: int | None) -> None: + assert_cell_operation_works(value, lambda cell: cell.dur.full_weeks(), expected, type_if_none=ColumnType.duration()) diff --git a/tests/safeds/data/tabular/query/_lazy_duration_operations/test_to_string.py b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_to_string.py new file mode 100644 index 000000000..f6ac84a1d --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_to_string.py @@ -0,0 +1,127 @@ +from datetime import timedelta +from typing import Literal + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "format_", "expected"), + [ + # ISO 8601 format + (timedelta(weeks=1), "iso", "P7D"), + (timedelta(weeks=-1), "iso", "-P7D"), + (timedelta(days=1), "iso", "P1D"), + (timedelta(days=-1), "iso", "-P1D"), + (timedelta(hours=1), "iso", "PT1H"), + (timedelta(hours=-1), "iso", "-PT1H"), + (timedelta(minutes=1), "iso", "PT1M"), + (timedelta(minutes=-1), "iso", "-PT1M"), + (timedelta(seconds=1), "iso", "PT1S"), + (timedelta(seconds=-1), "iso", "-PT1S"), + (timedelta(milliseconds=1), "iso", "PT0.001S"), + (timedelta(milliseconds=-1), "iso", "-PT0.001S"), + (timedelta(microseconds=1), "iso", "PT0.000001S"), + (timedelta(microseconds=-1), "iso", "-PT0.000001S"), + ( + timedelta(weeks=1, days=1, hours=1, minutes=1, seconds=1, milliseconds=1, microseconds=1), + "iso", + "P8DT1H1M1.001001S", + ), + ( + timedelta(weeks=-1, days=-1, hours=-1, minutes=-1, seconds=-1, milliseconds=-1, microseconds=-1), + "iso", + "-P8DT1H1M1.001001S", + ), + ( + timedelta(weeks=1, days=-1, hours=1, minutes=-1, seconds=1, milliseconds=-1, microseconds=1), + "iso", + "P6DT59M0.999001S", + ), + (None, "iso", None), + # Pretty format + (timedelta(weeks=1), "pretty", "7d"), + (timedelta(weeks=-1), "pretty", "-7d"), + (timedelta(days=1), "pretty", "1d"), + (timedelta(days=-1), "pretty", "-1d"), + (timedelta(hours=1), "pretty", "1h"), + (timedelta(hours=-1), "pretty", "-1h"), + (timedelta(minutes=1), "pretty", "1m"), + (timedelta(minutes=-1), "pretty", "-1m"), + (timedelta(seconds=1), "pretty", "1s"), + (timedelta(seconds=-1), "pretty", "-1s"), + (timedelta(milliseconds=1), "pretty", "1ms"), + (timedelta(milliseconds=-1), "pretty", "-1ms"), + (timedelta(microseconds=1), "pretty", "1µs"), + (timedelta(microseconds=-1), "pretty", "-1µs"), + ( + timedelta(weeks=1, days=1, hours=1, minutes=1, seconds=1, milliseconds=1, microseconds=1), + "pretty", + "8d 1h 1m 1s 1001µs", + ), + ( + timedelta(weeks=-1, days=-1, hours=-1, minutes=-1, seconds=-1, milliseconds=-1, microseconds=-1), + "pretty", + "-8d -1h -1m -1s -1001µs", + ), + ( + timedelta(weeks=1, days=-1, hours=1, minutes=-1, seconds=1, milliseconds=-1, microseconds=1), + "pretty", + "6d 59m 999001µs", + ), + (None, "pretty", None), + ], + ids=[ + # ISO 8601 format + "iso - positive weeks", + "iso - negative weeks", + "iso - positive days", + "iso - negative days", + "iso - positive hours", + "iso - negative hours", + "iso - positive minutes", + "iso - negative minutes", + "iso - positive seconds", + "iso - negative seconds", + "iso - positive milliseconds", + "iso - negative milliseconds", + "iso - positive microseconds", + "iso - negative microseconds", + "iso - all positive", + "iso - all negative", + "iso - mixed", + "iso - None", + # Pretty format + "pretty - positive weeks", + "pretty - negative weeks", + "pretty - positive days", + "pretty - negative days", + "pretty - positive hours", + "pretty - negative hours", + "pretty - positive minutes", + "pretty - negative minutes", + "pretty - positive seconds", + "pretty - negative seconds", + "pretty - positive milliseconds", + "pretty - negative milliseconds", + "pretty - positive microseconds", + "pretty - negative microseconds", + "pretty - all positive", + "pretty - all negative", + "pretty - mixed", + "pretty - None", + ], +) +def test_should_return_string_representation( + value: timedelta | None, + format_: Literal["iso", "pretty"], + expected: str | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dur.to_string(format=format_), + expected, + type_if_none=ColumnType.duration(), + ) From cac8bb0a6d2063d0d397f4b9e7f6c70e4dfa2769 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Thu, 16 Jan 2025 17:50:36 +0100 Subject: [PATCH 19/57] WIP --- ..._operations.py => _datetime_operations.py} | 27 +++++++------- ...ations.py => _lazy_datetime_operations.py} | 11 +++--- .../tabular/query/_lazy_string_operations.py | 1 + .../data/tabular/query/_string_operations.py | 36 +++++++++---------- .../__init__.py | 0 5 files changed, 41 insertions(+), 34 deletions(-) rename src/safeds/data/tabular/query/{_temporal_operations.py => _datetime_operations.py} (89%) rename src/safeds/data/tabular/query/{_lazy_temporal_operations.py => _lazy_datetime_operations.py} (93%) rename tests/safeds/data/tabular/query/{_lazy_temporal_operations => _lazy_datetime_operations}/__init__.py (100%) diff --git a/src/safeds/data/tabular/query/_temporal_operations.py b/src/safeds/data/tabular/query/_datetime_operations.py similarity index 89% rename from src/safeds/data/tabular/query/_temporal_operations.py rename to src/safeds/data/tabular/query/_datetime_operations.py index 3889735a6..14dbf0889 100644 --- a/src/safeds/data/tabular/query/_temporal_operations.py +++ b/src/safeds/data/tabular/query/_datetime_operations.py @@ -8,11 +8,12 @@ # TODO: Examples with None +# TODO: add hour etc. -class TemporalOperations(ABC): +class DatetimeOperations(ABC): """ - Namespace for operations on temporal data. + Namespace for operations on datetimes, dates, and times. This class cannot be instantiated directly. It can only be accessed using the `dt` attribute of a cell. @@ -20,7 +21,7 @@ class TemporalOperations(ABC): -------- >>> from safeds.data.tabular.containers import Column >>> import datetime - >>> column = Column("example", [datetime.date(2022, 1, 9)]) + >>> column = Column("a", [datetime.date(2022, 1, 9)]) >>> column.transform(lambda cell: cell.dt.date_to_string("%Y/%m/%d")) +------------+ | example | @@ -51,7 +52,7 @@ def __sizeof__(self) -> int: ... def __str__(self) -> str: ... # ------------------------------------------------------------------------------------------------------------------ - # String operations + # Datetime operations # ------------------------------------------------------------------------------------------------------------------ @abstractmethod @@ -61,13 +62,14 @@ def century(self) -> Cell[int | None]: Returns ------- + cell: A cell containing the century as integer. Examples -------- >>> from safeds.data.tabular.containers import Column >>> import datetime - >>> column = Column("example", [datetime.date(2022, 1, 1)]) + >>> column = Column("a", [datetime.date(2022, 1, 1)]) >>> column.transform(lambda cell: cell.dt.century()) +---------+ | example | @@ -85,13 +87,14 @@ def weekday(self) -> Cell[int | None]: Returns ------- + cell: A cell containing the weekday as integer. Examples -------- >>> from safeds.data.tabular.containers import Column >>> import datetime - >>> column = Column("example", [datetime.date(2022, 1, 1)]) + >>> column = Column("a", [datetime.date(2022, 1, 1)]) >>> column.transform(lambda cell: cell.dt.weekday()) +---------+ | example | @@ -115,7 +118,7 @@ def week(self) -> Cell[int | None]: -------- >>> from safeds.data.tabular.containers import Column >>> import datetime - >>> column = Column("example", [datetime.date(2022, 1, 1)]) + >>> column = Column("a", [datetime.date(2022, 1, 1)]) >>> column.transform(lambda cell: cell.dt.week()) +---------+ | example | @@ -139,7 +142,7 @@ def year(self) -> Cell[int | None]: -------- >>> from safeds.data.tabular.containers import Column >>> import datetime - >>> column = Column("example", [datetime.date(2022, 1, 9)]) + >>> column = Column("a", [datetime.date(2022, 1, 9)]) >>> column.transform(lambda cell: cell.dt.year()) +---------+ | example | @@ -163,7 +166,7 @@ def month(self) -> Cell[int | None]: -------- >>> from safeds.data.tabular.containers import Column >>> import datetime - >>> column = Column("example", [datetime.date(2022, 1, 9)]) + >>> column = Column("a", [datetime.date(2022, 1, 9)]) >>> column.transform(lambda cell: cell.dt.month()) +---------+ | example | @@ -187,7 +190,7 @@ def day(self) -> Cell[int | None]: -------- >>> from safeds.data.tabular.containers import Column >>> import datetime - >>> column = Column("example", [datetime.date(2022, 1, 9)]) + >>> column = Column("a", [datetime.date(2022, 1, 9)]) >>> column.transform(lambda cell: cell.dt.day()) +---------+ | example | @@ -222,7 +225,7 @@ def datetime_to_string(self, format_string: str = "%Y/%m/%d %H:%M:%S") -> Cell[s -------- >>> from safeds.data.tabular.containers import Column >>> import datetime - >>> column = Column("example", [ datetime.datetime(2022, 1, 9, 23, 29, 1, tzinfo=datetime.UTC)]) + >>> column = Column("a", [ datetime.datetime(2022, 1, 9, 23, 29, 1, tzinfo=datetime.UTC)]) >>> column.transform(lambda cell: cell.dt.datetime_to_string()) +---------------------+ | example | @@ -256,7 +259,7 @@ def date_to_string(self, format_string: str = "%F") -> Cell[str | None]: -------- >>> from safeds.data.tabular.containers import Column >>> import datetime - >>> column = Column("example", [datetime.date(2022, 1, 9)]) + >>> column = Column("a", [datetime.date(2022, 1, 9)]) >>> column.transform(lambda cell: cell.dt.date_to_string()) +------------+ | example | diff --git a/src/safeds/data/tabular/query/_lazy_temporal_operations.py b/src/safeds/data/tabular/query/_lazy_datetime_operations.py similarity index 93% rename from src/safeds/data/tabular/query/_lazy_temporal_operations.py rename to src/safeds/data/tabular/query/_lazy_datetime_operations.py index 829fa76ec..75eb11b15 100644 --- a/src/safeds/data/tabular/query/_lazy_temporal_operations.py +++ b/src/safeds/data/tabular/query/_lazy_datetime_operations.py @@ -5,7 +5,7 @@ from safeds._utils import _structural_hash from safeds.data.tabular.containers._lazy_cell import _LazyCell -from ._temporal_operations import TemporalOperations +from ._datetime_operations import DatetimeOperations if TYPE_CHECKING: import polars as pl @@ -13,7 +13,7 @@ from safeds.data.tabular.containers._cell import Cell -class _LazyTemporalOperations(TemporalOperations): +class _LazyDatetimeOperations(DatetimeOperations): # ------------------------------------------------------------------------------------------------------------------ # Dunder methods # ------------------------------------------------------------------------------------------------------------------ @@ -22,7 +22,7 @@ def __init__(self, expression: pl.Expr) -> None: self._expression: pl.Expr = expression def __eq__(self, other: object) -> bool: - if not isinstance(other, _LazyTemporalOperations): + if not isinstance(other, _LazyDatetimeOperations): return NotImplemented if self is other: return True @@ -32,7 +32,7 @@ def __hash__(self) -> int: return _structural_hash(self._expression.meta.serialize()) def __repr__(self) -> str: - return f"_LazyTemporalOperations({self._expression})" + return f"_LazyDatetimeOperations({self._expression})" def __sizeof__(self) -> int: return self._expression.__sizeof__() @@ -62,6 +62,9 @@ def month(self) -> Cell[int | None]: def day(self) -> Cell[int | None]: return _LazyCell(self._expression.dt.day()) + def hour(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.hour()) + def datetime_to_string(self, format_string: str = "%Y/%m/%d %H:%M:%S") -> Cell[str | None]: if not _check_format_string(format_string): raise ValueError("Invalid format string") diff --git a/src/safeds/data/tabular/query/_lazy_string_operations.py b/src/safeds/data/tabular/query/_lazy_string_operations.py index e0c603984..18de380fe 100644 --- a/src/safeds/data/tabular/query/_lazy_string_operations.py +++ b/src/safeds/data/tabular/query/_lazy_string_operations.py @@ -82,6 +82,7 @@ def to_datetime(self) -> Cell[datetime.datetime | None]: def to_int(self, *, base: int = 10) -> Cell[int | None]: return _LazyCell(self._expression.str.to_integer(base=base, strict=False)) + # TODO: keep this or just cast? def to_float(self) -> Cell[float | None]: import polars as pl diff --git a/src/safeds/data/tabular/query/_string_operations.py b/src/safeds/data/tabular/query/_string_operations.py index 10e9b2493..b8845ecff 100644 --- a/src/safeds/data/tabular/query/_string_operations.py +++ b/src/safeds/data/tabular/query/_string_operations.py @@ -20,7 +20,7 @@ class StringOperations(ABC): Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", ["ab", "bc", "cd"]) + >>> column = Column("a", ["ab", "bc", "cd"]) >>> column.transform(lambda cell: cell.str.to_uppercase()) +---------+ | example | @@ -74,7 +74,7 @@ def contains(self, substring: str) -> Cell[bool | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", ["ab", "bc", "cd"]) + >>> column = Column("a", ["ab", "bc", "cd"]) >>> column.count_if(lambda cell: cell.str.contains("b")) 2 """ @@ -97,7 +97,7 @@ def ends_with(self, suffix: str) -> Cell[bool | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", ["ab", "bc", "cd"]) + >>> column = Column("a", ["ab", "bc", "cd"]) >>> column.count_if(lambda cell: cell.str.ends_with("c")) 1 """ @@ -120,7 +120,7 @@ def index_of(self, substring: str) -> Cell[int | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", ["ab", "bc", "cd"]) + >>> column = Column("a", ["ab", "bc", "cd"]) >>> column.transform(lambda cell: cell.str.index_of("b")) +---------+ | example | @@ -152,7 +152,7 @@ def length(self, *, optimize_for_ascii: bool = False) -> Cell[int | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", ["", "a", "abc"]) + >>> column = Column("a", ["", "a", "abc"]) >>> column.transform(lambda cell: cell.str.length()) +---------+ | example | @@ -185,7 +185,7 @@ def replace(self, old: str, new: str) -> Cell[str | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", ["ab", "bc", "cd"]) + >>> column = Column("a", ["ab", "bc", "cd"]) >>> column.transform(lambda cell: cell.str.replace("b", "z")) +---------+ | example | @@ -216,7 +216,7 @@ def starts_with(self, prefix: str) -> Cell[bool | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", ["ab", "bc", "cd"]) + >>> column = Column("a", ["ab", "bc", "cd"]) >>> column.count_if(lambda cell: cell.str.starts_with("a")) 1 """ @@ -247,7 +247,7 @@ def substring(self, start: int = 0, length: int | None = None) -> Cell[str | Non Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", ["abc", "def", "ghi"]) + >>> column = Column("a", ["abc", "def", "ghi"]) >>> column.transform(lambda cell: cell.str.substring(1, 2)) +---------+ | example | @@ -273,7 +273,7 @@ def to_date(self) -> Cell[datetime.date | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", ["2021-01-01", "2021-02-01", "abc"]) + >>> column = Column("a", ["2021-01-01", "2021-02-01", "abc"]) >>> column.transform(lambda cell: cell.str.to_date()) +------------+ | example | @@ -299,7 +299,7 @@ def to_datetime(self) -> Cell[datetime.datetime | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", ["2021-01-01T00:00:00z", "2021-02-01T00:00:00z", "abc"]) + >>> column = Column("a", ["2021-01-01T00:00:00z", "2021-02-01T00:00:00z", "abc"]) >>> column.transform(lambda cell: cell.str.to_datetime()) +-------------------------+ | example | @@ -325,7 +325,7 @@ def to_float(self) -> Cell[float | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", ["1", "3.4", "5.6", "abc"]) + >>> column = Column("a", ["1", "3.4", "5.6", "abc"]) >>> column.transform(lambda cell: cell.str.to_float()) +---------+ | example | @@ -357,7 +357,7 @@ def to_int(self, *, base: int = 10) -> Cell[int | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column1 = Column("example", ["1", "2", "3", "abc"]) + >>> column1 = Column("a", ["1", "2", "3", "abc"]) >>> column1.transform(lambda cell: cell.str.to_int()) +---------+ | example | @@ -370,7 +370,7 @@ def to_int(self, *, base: int = 10) -> Cell[int | None]: | null | +---------+ - >>> column2 = Column("example", ["1", "10", "11", "abc"]) + >>> column2 = Column("a", ["1", "10", "11", "abc"]) >>> column2.transform(lambda cell: cell.str.to_int(base=2)) +---------+ | example | @@ -397,7 +397,7 @@ def to_lowercase(self) -> Cell[str | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", ["AB", "BC", "CD"]) + >>> column = Column("a", ["AB", "BC", "CD"]) >>> column.transform(lambda cell: cell.str.to_lowercase()) +---------+ | example | @@ -423,7 +423,7 @@ def to_uppercase(self) -> Cell[str | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", ["ab", "bc", "cd"]) + >>> column = Column("a", ["ab", "bc", "cd"]) >>> column.transform(lambda cell: cell.str.to_uppercase()) +---------+ | example | @@ -449,7 +449,7 @@ def trim(self) -> Cell[str | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", ["", " abc", "abc ", " abc "]) + >>> column = Column("a", ["", " abc", "abc ", " abc "]) >>> column.transform(lambda cell: cell.str.trim()) +---------+ | example | @@ -476,7 +476,7 @@ def trim_end(self) -> Cell[str | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", ["", " abc", "abc ", " abc "]) + >>> column = Column("a", ["", " abc", "abc ", " abc "]) >>> column.transform(lambda cell: cell.str.trim_end()) +---------+ | example | @@ -503,7 +503,7 @@ def trim_start(self) -> Cell[str | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", ["", " abc", "abc ", " abc "]) + >>> column = Column("a", ["", " abc", "abc ", " abc "]) >>> column.transform(lambda cell: cell.str.trim_start()) +---------+ | example | diff --git a/tests/safeds/data/tabular/query/_lazy_temporal_operations/__init__.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/__init__.py similarity index 100% rename from tests/safeds/data/tabular/query/_lazy_temporal_operations/__init__.py rename to tests/safeds/data/tabular/query/_lazy_datetime_operations/__init__.py From 4e9def34424ec2955e95a959ff62784fd3b6be1a Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Thu, 16 Jan 2025 18:12:29 +0100 Subject: [PATCH 20/57] docs: update outputs --- .../tabular/query/_lazy_string_operations.py | 29 +- .../data/tabular/query/_string_operations.py | 375 ++++++++++-------- .../_lazy_string_operations/test_substring.py | 2 +- .../_lazy_string_operations/test_to_float.py | 24 -- 4 files changed, 214 insertions(+), 216 deletions(-) delete mode 100644 tests/safeds/data/tabular/query/_lazy_string_operations/test_to_float.py diff --git a/src/safeds/data/tabular/query/_lazy_string_operations.py b/src/safeds/data/tabular/query/_lazy_string_operations.py index 18de380fe..5e6f751ae 100644 --- a/src/safeds/data/tabular/query/_lazy_string_operations.py +++ b/src/safeds/data/tabular/query/_lazy_string_operations.py @@ -3,7 +3,6 @@ from typing import TYPE_CHECKING from safeds._utils import _structural_hash -from safeds._validation import _check_bounds, _ClosedBound from safeds.data.tabular.containers._lazy_cell import _LazyCell from ._string_operations import StringOperations @@ -13,6 +12,7 @@ import polars as pl + from safeds._typing import _ConvertibleToIntCell, _ConvertibleToStringCell from safeds.data.tabular.containers._cell import Cell @@ -47,7 +47,7 @@ def __str__(self) -> str: # String operations # ------------------------------------------------------------------------------------------------------------------ - def contains(self, substring: str) -> Cell[bool | None]: + def contains(self, substring: _ConvertibleToStringCell) -> Cell[bool | None]: return _LazyCell(self._expression.str.contains(substring, literal=True)) def length(self, optimize_for_ascii: bool = False) -> Cell[int | None]: @@ -56,21 +56,24 @@ def length(self, optimize_for_ascii: bool = False) -> Cell[int | None]: else: return _LazyCell(self._expression.str.len_chars()) - def ends_with(self, suffix: str) -> Cell[bool | None]: + def ends_with(self, suffix: _ConvertibleToStringCell) -> Cell[bool | None]: return _LazyCell(self._expression.str.ends_with(suffix)) - def index_of(self, substring: str) -> Cell[int | None]: + def index_of(self, substring: _ConvertibleToStringCell) -> Cell[int | None]: return _LazyCell(self._expression.str.find(substring, literal=True)) - def replace(self, old: str, new: str) -> Cell[str | None]: + def replace(self, old: _ConvertibleToStringCell, new: _ConvertibleToStringCell) -> Cell[str | None]: return _LazyCell(self._expression.str.replace_all(old, new, literal=True)) - def starts_with(self, prefix: str) -> Cell[bool | None]: + def starts_with(self, prefix: _ConvertibleToStringCell) -> Cell[bool | None]: return _LazyCell(self._expression.str.starts_with(prefix)) - def substring(self, start: int = 0, length: int | None = None) -> Cell[str | None]: - _check_bounds("length", length, lower_bound=_ClosedBound(0)) - + def substring( + self, + *, + start: _ConvertibleToIntCell = 0, + length: _ConvertibleToIntCell = None, + ) -> Cell[str | None]: return _LazyCell(self._expression.str.slice(start, length)) def to_date(self) -> Cell[datetime.date | None]: @@ -79,15 +82,9 @@ def to_date(self) -> Cell[datetime.date | None]: def to_datetime(self) -> Cell[datetime.datetime | None]: return _LazyCell(self._expression.str.to_datetime(format="%+", strict=False)) - def to_int(self, *, base: int = 10) -> Cell[int | None]: + def to_int(self, *, base: _ConvertibleToIntCell = 10) -> Cell[int | None]: return _LazyCell(self._expression.str.to_integer(base=base, strict=False)) - # TODO: keep this or just cast? - def to_float(self) -> Cell[float | None]: - import polars as pl - - return _LazyCell(self._expression.cast(pl.Float64, strict=False)) - def to_lowercase(self) -> Cell[str | None]: return _LazyCell(self._expression.str.to_lowercase()) diff --git a/src/safeds/data/tabular/query/_string_operations.py b/src/safeds/data/tabular/query/_string_operations.py index b8845ecff..ee42113d3 100644 --- a/src/safeds/data/tabular/query/_string_operations.py +++ b/src/safeds/data/tabular/query/_string_operations.py @@ -6,9 +6,14 @@ if TYPE_CHECKING: import datetime + from safeds._typing import _ConvertibleToIntCell, _ConvertibleToStringCell from safeds.data.tabular.containers import Cell # TODO: examples with None +# TODO: add more methods +# - reverse +# - to_time +# - ... class StringOperations(ABC): @@ -22,15 +27,15 @@ class StringOperations(ABC): >>> from safeds.data.tabular.containers import Column >>> column = Column("a", ["ab", "bc", "cd"]) >>> column.transform(lambda cell: cell.str.to_uppercase()) - +---------+ - | example | - | --- | - | str | - +=========+ - | AB | - | BC | - | CD | - +---------+ + +-----+ + | a | + | --- | + | str | + +=====+ + | AB | + | BC | + | CD | + +-----+ """ # ------------------------------------------------------------------------------------------------------------------ @@ -57,7 +62,7 @@ def __str__(self) -> str: ... # ------------------------------------------------------------------------------------------------------------------ @abstractmethod - def contains(self, substring: str) -> Cell[bool | None]: + def contains(self, substring: _ConvertibleToStringCell) -> Cell[bool | None]: """ Check if the string value in the cell contains the substring. @@ -74,13 +79,22 @@ def contains(self, substring: str) -> Cell[bool | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["ab", "bc", "cd"]) - >>> column.count_if(lambda cell: cell.str.contains("b")) - 2 + >>> column = Column("a", ["ab", "bc", "cd", None]) + >>> column.transform(lambda cell: cell.str.contains("b")) + +-------+ + | a | + | --- | + | bool | + +=======+ + | true | + | true | + | false | + | null | + +-------+ """ @abstractmethod - def ends_with(self, suffix: str) -> Cell[bool | None]: + def ends_with(self, suffix: _ConvertibleToStringCell) -> Cell[bool | None]: """ Check if the string value in the cell ends with the suffix. @@ -97,13 +111,22 @@ def ends_with(self, suffix: str) -> Cell[bool | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["ab", "bc", "cd"]) - >>> column.count_if(lambda cell: cell.str.ends_with("c")) - 1 + >>> column = Column("a", ["ab", "bc", "cd", None]) + >>> column.transform(lambda cell: cell.str.ends_with("c")) + +-------+ + | a | + | --- | + | bool | + +=======+ + | false | + | true | + | false | + | null | + +-------+ """ @abstractmethod - def index_of(self, substring: str) -> Cell[int | None]: + def index_of(self, substring: _ConvertibleToStringCell) -> Cell[int | None]: """ Get the index of the first occurrence of the substring in the string value in the cell. @@ -120,17 +143,18 @@ def index_of(self, substring: str) -> Cell[int | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["ab", "bc", "cd"]) + >>> column = Column("a", ["ab", "bc", "cd", None]) >>> column.transform(lambda cell: cell.str.index_of("b")) - +---------+ - | example | - | --- | - | u32 | - +=========+ - | 1 | - | 0 | - | null | - +---------+ + +------+ + | a | + | --- | + | u32 | + +======+ + | 1 | + | 0 | + | null | + | null | + +------+ """ @abstractmethod @@ -152,21 +176,22 @@ def length(self, *, optimize_for_ascii: bool = False) -> Cell[int | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["", "a", "abc"]) + >>> column = Column("a", ["", "a", "abc", None]) >>> column.transform(lambda cell: cell.str.length()) - +---------+ - | example | - | --- | - | u32 | - +=========+ - | 0 | - | 1 | - | 3 | - +---------+ + +------+ + | a | + | --- | + | u32 | + +======+ + | 0 | + | 1 | + | 3 | + | null | + +------+ """ @abstractmethod - def replace(self, old: str, new: str) -> Cell[str | None]: + def replace(self, old: _ConvertibleToStringCell, new: _ConvertibleToStringCell) -> Cell[str | None]: """ Replace occurrences of the old substring with the new substring in the string value in the cell. @@ -185,21 +210,22 @@ def replace(self, old: str, new: str) -> Cell[str | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["ab", "bc", "cd"]) + >>> column = Column("a", ["ab", "bc", "cd", None]) >>> column.transform(lambda cell: cell.str.replace("b", "z")) - +---------+ - | example | - | --- | - | str | - +=========+ - | az | - | zc | - | cd | - +---------+ + +------+ + | a | + | --- | + | str | + +======+ + | az | + | zc | + | cd | + | null | + +------+ """ @abstractmethod - def starts_with(self, prefix: str) -> Cell[bool | None]: + def starts_with(self, prefix: _ConvertibleToStringCell) -> Cell[bool | None]: """ Check if the string value in the cell starts with the prefix. @@ -216,13 +242,27 @@ def starts_with(self, prefix: str) -> Cell[bool | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["ab", "bc", "cd"]) - >>> column.count_if(lambda cell: cell.str.starts_with("a")) - 1 + >>> column = Column("a", ["ab", "bc", "cd", None]) + >>> column.transform(lambda cell: cell.str.starts_with("a")) + +-------+ + | a | + | --- | + | bool | + +=======+ + | true | + | false | + | false | + | null | + +-------+ """ @abstractmethod - def substring(self, start: int = 0, length: int | None = None) -> Cell[str | None]: + def substring( + self, + *, + start: _ConvertibleToIntCell = 0, + length: _ConvertibleToIntCell = None, + ) -> Cell[str | None]: """ Get a substring of the string value in the cell. @@ -247,19 +287,21 @@ def substring(self, start: int = 0, length: int | None = None) -> Cell[str | Non Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["abc", "def", "ghi"]) - >>> column.transform(lambda cell: cell.str.substring(1, 2)) - +---------+ - | example | - | --- | - | str | - +=========+ - | bc | - | ef | - | hi | - +---------+ - """ - + >>> column = Column("a", ["abc", "def", "ghi", None]) + >>> column.transform(lambda cell: cell.str.substring(start=1, length=2)) + +------+ + | a | + | --- | + | str | + +======+ + | bc | + | ef | + | hi | + | null | + +------+ + """ + + # TODO: add format parameter @abstractmethod def to_date(self) -> Cell[datetime.date | None]: """ @@ -273,19 +315,21 @@ def to_date(self) -> Cell[datetime.date | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["2021-01-01", "2021-02-01", "abc"]) + >>> column = Column("a", ["2021-01-01", "2021-02-01", "abc", None]) >>> column.transform(lambda cell: cell.str.to_date()) +------------+ - | example | + | a | | --- | | date | +============+ | 2021-01-01 | | 2021-02-01 | | null | + | null | +------------+ """ + # TODO: add format parameter @abstractmethod def to_datetime(self) -> Cell[datetime.datetime | None]: """ @@ -299,48 +343,22 @@ def to_datetime(self) -> Cell[datetime.datetime | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["2021-01-01T00:00:00z", "2021-02-01T00:00:00z", "abc"]) + >>> column = Column("a", ["2021-01-01T00:00:00z", "2021-02-01T00:00:00z", "abc", None]) >>> column.transform(lambda cell: cell.str.to_datetime()) +-------------------------+ - | example | + | a | | --- | | datetime[μs, UTC] | +=========================+ | 2021-01-01 00:00:00 UTC | | 2021-02-01 00:00:00 UTC | | null | + | null | +-------------------------+ """ @abstractmethod - def to_float(self) -> Cell[float | None]: - """ - Convert the string value in the cell to a float. - - Returns - ------- - float: - The float value. If the string cannot be converted to a float, None is returned. - - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["1", "3.4", "5.6", "abc"]) - >>> column.transform(lambda cell: cell.str.to_float()) - +---------+ - | example | - | --- | - | f64 | - +=========+ - | 1.00000 | - | 3.40000 | - | 5.60000 | - | null | - +---------+ - """ - - @abstractmethod - def to_int(self, *, base: int = 10) -> Cell[int | None]: + def to_int(self, *, base: _ConvertibleToIntCell = 10) -> Cell[int | None]: """ Convert the string value in the cell to an integer. @@ -357,31 +375,33 @@ def to_int(self, *, base: int = 10) -> Cell[int | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column1 = Column("a", ["1", "2", "3", "abc"]) + >>> column1 = Column("a", ["1", "2", "3", "abc", None]) >>> column1.transform(lambda cell: cell.str.to_int()) - +---------+ - | example | - | --- | - | i64 | - +=========+ - | 1 | - | 2 | - | 3 | - | null | - +---------+ - - >>> column2 = Column("a", ["1", "10", "11", "abc"]) + +------+ + | a | + | --- | + | i64 | + +======+ + | 1 | + | 2 | + | 3 | + | null | + | null | + +------+ + + >>> column2 = Column("a", ["1", "10", "11", "abc", None]) >>> column2.transform(lambda cell: cell.str.to_int(base=2)) - +---------+ - | example | - | --- | - | i64 | - +=========+ - | 1 | - | 2 | - | 3 | - | null | - +---------+ + +------+ + | a | + | --- | + | i64 | + +======+ + | 1 | + | 2 | + | 3 | + | null | + | null | + +------+ """ @abstractmethod @@ -397,17 +417,18 @@ def to_lowercase(self) -> Cell[str | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["AB", "BC", "CD"]) + >>> column = Column("a", ["AB", "BC", "CD", None]) >>> column.transform(lambda cell: cell.str.to_lowercase()) - +---------+ - | example | - | --- | - | str | - +=========+ - | ab | - | bc | - | cd | - +---------+ + +------+ + | a | + | --- | + | str | + +======+ + | ab | + | bc | + | cd | + | null | + +------+ """ @abstractmethod @@ -423,17 +444,18 @@ def to_uppercase(self) -> Cell[str | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["ab", "bc", "cd"]) + >>> column = Column("a", ["ab", "bc", "cd", None]) >>> column.transform(lambda cell: cell.str.to_uppercase()) - +---------+ - | example | - | --- | - | str | - +=========+ - | AB | - | BC | - | CD | - +---------+ + +------+ + | a | + | --- | + | str | + +======+ + | AB | + | BC | + | CD | + | null | + +------+ """ @abstractmethod @@ -449,18 +471,19 @@ def trim(self) -> Cell[str | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["", " abc", "abc ", " abc "]) + >>> column = Column("a", ["", " abc", "abc ", " abc ", None]) >>> column.transform(lambda cell: cell.str.trim()) - +---------+ - | example | - | --- | - | str | - +=========+ - | | - | abc | - | abc | - | abc | - +---------+ + +------+ + | a | + | --- | + | str | + +======+ + | | + | abc | + | abc | + | abc | + | null | + +------+ """ @abstractmethod @@ -476,18 +499,19 @@ def trim_end(self) -> Cell[str | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["", " abc", "abc ", " abc "]) + >>> column = Column("a", ["", " abc", "abc ", " abc ", None]) >>> column.transform(lambda cell: cell.str.trim_end()) - +---------+ - | example | - | --- | - | str | - +=========+ - | | - | abc | - | abc | - | abc | - +---------+ + +------+ + | a | + | --- | + | str | + +======+ + | | + | abc | + | abc | + | abc | + | null | + +------+ """ @abstractmethod @@ -503,16 +527,17 @@ def trim_start(self) -> Cell[str | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["", " abc", "abc ", " abc "]) + >>> column = Column("a", ["", " abc", "abc ", " abc ", None]) >>> column.transform(lambda cell: cell.str.trim_start()) - +---------+ - | example | - | --- | - | str | - +=========+ - | | - | abc | - | abc | - | abc | - +---------+ + +------+ + | a | + | --- | + | str | + +======+ + | | + | abc | + | abc | + | abc | + | null | + +------+ """ diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_substring.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_substring.py index ab2496486..8d1164a38 100644 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_substring.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_substring.py @@ -28,7 +28,7 @@ ], ) def test_should_return_substring(string: str, start: int, length: int | None, expected: str) -> None: - assert_cell_operation_works(string, lambda cell: cell.str.substring(start, length), expected) + assert_cell_operation_works(string, lambda cell: cell.str.substring(start=start, length=length), expected) def test_should_raise_if_length_is_negative() -> None: diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_float.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_float.py deleted file mode 100644 index f9ea7ef9c..000000000 --- a/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_float.py +++ /dev/null @@ -1,24 +0,0 @@ -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("string", "expected"), - [ - ("", None), - ("11", 11), - ("11.5", 11.5), - ("10e-1", 1.0), - ("abc", None), - ], - ids=[ - "empty", - "integer", - "float", - "scientific notation", - "invalid string", - ], -) -def test_should_parse_float(string: str, expected: bool) -> None: - assert_cell_operation_works(string, lambda cell: cell.str.to_float(), expected) From 815eb46f40aa6ff24151d2b34f64c868c8288260 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Thu, 16 Jan 2025 18:15:42 +0100 Subject: [PATCH 21/57] docs: update outputs --- .../tabular/query/_datetime_operations.py | 103 +++++++++--------- 1 file changed, 53 insertions(+), 50 deletions(-) diff --git a/src/safeds/data/tabular/query/_datetime_operations.py b/src/safeds/data/tabular/query/_datetime_operations.py index 14dbf0889..28248fc35 100644 --- a/src/safeds/data/tabular/query/_datetime_operations.py +++ b/src/safeds/data/tabular/query/_datetime_operations.py @@ -22,13 +22,13 @@ class DatetimeOperations(ABC): >>> from safeds.data.tabular.containers import Column >>> import datetime >>> column = Column("a", [datetime.date(2022, 1, 9)]) - >>> column.transform(lambda cell: cell.dt.date_to_string("%Y/%m/%d")) + >>> column.transform(lambda cell: cell.dt.date_to_string("%d.%m.%Y")) +------------+ - | example | + | a | | --- | | str | +============+ - | 2022/01/09 | + | 09.01.2022 | +------------+ """ @@ -71,13 +71,13 @@ def century(self) -> Cell[int | None]: >>> import datetime >>> column = Column("a", [datetime.date(2022, 1, 1)]) >>> column.transform(lambda cell: cell.dt.century()) - +---------+ - | example | - | --- | - | i32 | - +=========+ - | 21 | - +---------+ + +-----+ + | a | + | --- | + | i32 | + +=====+ + | 21 | + +-----+ """ @abstractmethod @@ -96,13 +96,13 @@ def weekday(self) -> Cell[int | None]: >>> import datetime >>> column = Column("a", [datetime.date(2022, 1, 1)]) >>> column.transform(lambda cell: cell.dt.weekday()) - +---------+ - | example | - | --- | - | i8 | - +=========+ - | 6 | - +---------+ + +-----+ + | a | + | --- | + | i8 | + +=====+ + | 6 | + +-----+ """ @abstractmethod @@ -112,6 +112,7 @@ def week(self) -> Cell[int | None]: Returns ------- + cell: A cell containing the week as integer. Examples @@ -120,13 +121,13 @@ def week(self) -> Cell[int | None]: >>> import datetime >>> column = Column("a", [datetime.date(2022, 1, 1)]) >>> column.transform(lambda cell: cell.dt.week()) - +---------+ - | example | - | --- | - | i8 | - +=========+ - | 52 | - +---------+ + +-----+ + | a | + | --- | + | i8 | + +=====+ + | 52 | + +-----+ """ @abstractmethod @@ -136,6 +137,7 @@ def year(self) -> Cell[int | None]: Returns ------- + cell: A cell containing the year as integer. Examples @@ -144,13 +146,13 @@ def year(self) -> Cell[int | None]: >>> import datetime >>> column = Column("a", [datetime.date(2022, 1, 9)]) >>> column.transform(lambda cell: cell.dt.year()) - +---------+ - | example | - | --- | - | i32 | - +=========+ - | 2022 | - +---------+ + +------+ + | a | + | --- | + | i32 | + +======+ + | 2022 | + +------+ """ @abstractmethod @@ -160,6 +162,7 @@ def month(self) -> Cell[int | None]: Returns ------- + cell: A cell containing the month as integer. Examples @@ -168,13 +171,13 @@ def month(self) -> Cell[int | None]: >>> import datetime >>> column = Column("a", [datetime.date(2022, 1, 9)]) >>> column.transform(lambda cell: cell.dt.month()) - +---------+ - | example | - | --- | - | i8 | - +=========+ - | 1 | - +---------+ + +-----+ + | a | + | --- | + | i8 | + +=====+ + | 1 | + +-----+ """ @abstractmethod @@ -184,6 +187,7 @@ def day(self) -> Cell[int | None]: Returns ------- + cell: A cell containing the day as integer. Examples @@ -192,13 +196,13 @@ def day(self) -> Cell[int | None]: >>> import datetime >>> column = Column("a", [datetime.date(2022, 1, 9)]) >>> column.transform(lambda cell: cell.dt.day()) - +---------+ - | example | - | --- | - | i8 | - +=========+ - | 9 | - +---------+ + +-----+ + | a | + | --- | + | i8 | + +=====+ + | 9 | + +-----+ """ @abstractmethod @@ -213,7 +217,7 @@ def datetime_to_string(self, format_string: str = "%Y/%m/%d %H:%M:%S") -> Cell[s Returns ------- - date: + cell: The string value. Raises @@ -228,7 +232,7 @@ def datetime_to_string(self, format_string: str = "%Y/%m/%d %H:%M:%S") -> Cell[s >>> column = Column("a", [ datetime.datetime(2022, 1, 9, 23, 29, 1, tzinfo=datetime.UTC)]) >>> column.transform(lambda cell: cell.dt.datetime_to_string()) +---------------------+ - | example | + | a | | --- | | str | +=====================+ @@ -248,10 +252,9 @@ def date_to_string(self, format_string: str = "%F") -> Cell[str | None]: Returns ------- - date: + cell: The string value. - ValueError If the formatstring is invalid. @@ -262,7 +265,7 @@ def date_to_string(self, format_string: str = "%F") -> Cell[str | None]: >>> column = Column("a", [datetime.date(2022, 1, 9)]) >>> column.transform(lambda cell: cell.dt.date_to_string()) +------------+ - | example | + | a | | --- | | str | +============+ From 614b180f076bed89369639039895e7f3853e5351 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Thu, 16 Jan 2025 18:20:04 +0100 Subject: [PATCH 22/57] feat: only one method to convert datetime values to string --- .../tabular/query/_datetime_operations.py | 48 +++---------------- .../query/_lazy_datetime_operations.py | 8 +--- 2 files changed, 8 insertions(+), 48 deletions(-) diff --git a/src/safeds/data/tabular/query/_datetime_operations.py b/src/safeds/data/tabular/query/_datetime_operations.py index 28248fc35..a11b2f73b 100644 --- a/src/safeds/data/tabular/query/_datetime_operations.py +++ b/src/safeds/data/tabular/query/_datetime_operations.py @@ -19,16 +19,17 @@ class DatetimeOperations(ABC): Examples -------- + >>> from datetime import date >>> from safeds.data.tabular.containers import Column - >>> import datetime - >>> column = Column("a", [datetime.date(2022, 1, 9)]) - >>> column.transform(lambda cell: cell.dt.date_to_string("%d.%m.%Y")) + >>> column = Column("a", [date(2022, 1, 9), date(2024, 6, 12)]) + >>> column.transform(lambda cell: cell.dt.to_string("%d.%m.%Y")) +------------+ | a | | --- | | str | +============+ | 09.01.2022 | + | 12.06.2024 | +------------+ """ @@ -206,44 +207,9 @@ def day(self) -> Cell[int | None]: """ @abstractmethod - def datetime_to_string(self, format_string: str = "%Y/%m/%d %H:%M:%S") -> Cell[str | None]: + def to_string(self, format_string: str = "%F") -> Cell[str | None]: """ - Convert the date value in the cell to a string. - - Parameters - ---------- - format_string: - The format string it will be used to convert the data into the string. - - Returns - ------- - cell: - The string value. - - Raises - ------ - ValueError - If the formatstring is invalid. - - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> import datetime - >>> column = Column("a", [ datetime.datetime(2022, 1, 9, 23, 29, 1, tzinfo=datetime.UTC)]) - >>> column.transform(lambda cell: cell.dt.datetime_to_string()) - +---------------------+ - | a | - | --- | - | str | - +=====================+ - | 2022/01/09 23:29:01 | - +---------------------+ - """ - - @abstractmethod - def date_to_string(self, format_string: str = "%F") -> Cell[str | None]: - """ - Convert the date value in the cell to a string. + Convert the datetime/date/time value in the cell to a string. Parameters ---------- @@ -263,7 +229,7 @@ def date_to_string(self, format_string: str = "%F") -> Cell[str | None]: >>> from safeds.data.tabular.containers import Column >>> import datetime >>> column = Column("a", [datetime.date(2022, 1, 9)]) - >>> column.transform(lambda cell: cell.dt.date_to_string()) + >>> column.transform(lambda cell: cell.dt.to_string()) +------------+ | a | | --- | diff --git a/src/safeds/data/tabular/query/_lazy_datetime_operations.py b/src/safeds/data/tabular/query/_lazy_datetime_operations.py index 75eb11b15..cfd66ea67 100644 --- a/src/safeds/data/tabular/query/_lazy_datetime_operations.py +++ b/src/safeds/data/tabular/query/_lazy_datetime_operations.py @@ -65,17 +65,11 @@ def day(self) -> Cell[int | None]: def hour(self) -> Cell[int | None]: return _LazyCell(self._expression.dt.hour()) - def datetime_to_string(self, format_string: str = "%Y/%m/%d %H:%M:%S") -> Cell[str | None]: + def to_string(self, format_string: str = "%F") -> Cell[str | None]: if not _check_format_string(format_string): raise ValueError("Invalid format string") return _LazyCell(self._expression.dt.to_string(format=format_string)) - def date_to_string(self, format_string: str = "%F") -> Cell[str | None]: - if not _check_format_string(format_string): - # Fehler in _check_format_string - raise ValueError("Invalid format string") - return _LazyCell(self._expression.dt.to_string(format=format_string)) - def _check_format_string(format_string: str) -> bool: valid_format_codes = { From 0c910a6c16f13ad23b21e72494d7c7d7992653fd Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Thu, 16 Jan 2025 19:53:27 +0100 Subject: [PATCH 23/57] feat: add more datetime operations --- .../tabular/query/_datetime_operations.py | 726 ++++++++++++++++-- .../query/_lazy_datetime_operations.py | 65 +- 2 files changed, 694 insertions(+), 97 deletions(-) diff --git a/src/safeds/data/tabular/query/_datetime_operations.py b/src/safeds/data/tabular/query/_datetime_operations.py index a11b2f73b..9f96dc539 100644 --- a/src/safeds/data/tabular/query/_datetime_operations.py +++ b/src/safeds/data/tabular/query/_datetime_operations.py @@ -4,6 +4,8 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: + import datetime as python_datetime + from safeds.data.tabular.containers import Cell @@ -22,15 +24,15 @@ class DatetimeOperations(ABC): >>> from datetime import date >>> from safeds.data.tabular.containers import Column >>> column = Column("a", [date(2022, 1, 9), date(2024, 6, 12)]) - >>> column.transform(lambda cell: cell.dt.to_string("%d.%m.%Y")) - +------------+ - | a | - | --- | - | str | - +============+ - | 09.01.2022 | - | 12.06.2024 | - +------------+ + >>> column.transform(lambda cell: cell.dt.year()) + +------+ + | a | + | --- | + | i32 | + +======+ + | 2022 | + | 2024 | + +------+ """ # ------------------------------------------------------------------------------------------------------------------ @@ -53,167 +55,691 @@ def __sizeof__(self) -> int: ... def __str__(self) -> str: ... # ------------------------------------------------------------------------------------------------------------------ - # Datetime operations + # Extract components # ------------------------------------------------------------------------------------------------------------------ @abstractmethod def century(self) -> Cell[int | None]: """ - Get the century of the underlying date(time) data. + Extract the century from a datetime or date. + + Note that since our calendar begins with year 1 the first century lasts from year 1 to year 100. Subsequent + centuries begin with years ending in "01" and end with years ending in "00". Returns ------- cell: - A cell containing the century as integer. + The century. Examples -------- + >>> from datetime import datetime, date >>> from safeds.data.tabular.containers import Column - >>> import datetime - >>> column = Column("a", [datetime.date(2022, 1, 1)]) - >>> column.transform(lambda cell: cell.dt.century()) - +-----+ - | a | - | --- | - | i32 | - +=====+ - | 21 | - +-----+ + >>> column1 = Column("a", [datetime(1999, 12, 31), datetime(2000, 1, 1), datetime(2001, 1, 1), None]) + >>> column1.transform(lambda cell: cell.dt.century()) + +------+ + | a | + | --- | + | i32 | + +======+ + | 20 | + | 20 | + | 21 | + | null | + +------+ + + >>> column2 = Column("a", [date(1999, 12, 31), date(2000, 1, 1), date(2001, 1, 1), None]) + >>> column2.transform(lambda cell: cell.dt.century()) + +------+ + | a | + | --- | + | i32 | + +======+ + | 20 | + | 20 | + | 21 | + | null | + +------+ """ @abstractmethod - def weekday(self) -> Cell[int | None]: + def date(self) -> Cell[python_datetime.date | None]: """ - Get the weekday of the underlying date(time) data. + Extract the date from a datetime. Returns ------- cell: - A cell containing the weekday as integer. + The date. Examples -------- + >>> from datetime import datetime >>> from safeds.data.tabular.containers import Column - >>> import datetime - >>> column = Column("a", [datetime.date(2022, 1, 1)]) - >>> column.transform(lambda cell: cell.dt.weekday()) - +-----+ - | a | - | --- | - | i8 | - +=====+ - | 6 | - +-----+ + >>> column = Column("a", [datetime(1999, 12, 31), datetime(2000, 1, 1, 12, 30, 0), None]) + >>> column.transform(lambda cell: cell.dt.date()) + +------------+ + | a | + | --- | + | date | + +============+ + | 1999-12-31 | + | 2000-01-01 | + | null | + +------------+ """ @abstractmethod - def week(self) -> Cell[int | None]: + def day(self) -> Cell[int | None]: """ - Get the week of the underlying date(time) data. + Extract the day from a datetime or date. Returns ------- cell: - A cell containing the week as integer. + The day. Examples -------- + >>> from datetime import datetime, date >>> from safeds.data.tabular.containers import Column - >>> import datetime - >>> column = Column("a", [datetime.date(2022, 1, 1)]) - >>> column.transform(lambda cell: cell.dt.week()) - +-----+ - | a | - | --- | - | i8 | - +=====+ - | 52 | - +-----+ + >>> column1 = Column("a", [datetime(1999, 12, 31), datetime(2000, 1, 1), None]) + >>> column1.transform(lambda cell: cell.dt.day()) + +------+ + | a | + | --- | + | i8 | + +======+ + | 31 | + | 1 | + | null | + +------+ + + >>> column2 = Column("a", [date(1999, 12, 31), date(2000, 1, 1), None]) + >>> column2.transform(lambda cell: cell.dt.day()) + +------+ + | a | + | --- | + | i8 | + +======+ + | 31 | + | 1 | + | null | + +------+ """ @abstractmethod - def year(self) -> Cell[int | None]: + def day_of_year(self) -> Cell[int | None]: """ - Get the year of the underlying date(time) data. + Extract the day of the year from a datetime or date. + + The day of the year is a number between 1 and 366. A 366th day only occurs in leap years. Returns ------- cell: - A cell containing the year as integer. + The day of the year. Examples -------- + >>> from datetime import datetime, date >>> from safeds.data.tabular.containers import Column - >>> import datetime - >>> column = Column("a", [datetime.date(2022, 1, 9)]) - >>> column.transform(lambda cell: cell.dt.year()) + >>> column1 = Column("a", [datetime(1999, 12, 31), datetime(2000, 1, 1), datetime(2000, 12, 31), None]) + >>> column1.transform(lambda cell: cell.dt.day_of_year()) + +------+ + | a | + | --- | + | i16 | + +======+ + | 365 | + | 1 | + | 366 | + | null | + +------+ + + >>> column2 = Column("a", [date(1999, 12, 31), date(2000, 1, 1), date(2000, 12, 31), None]) + >>> column2.transform(lambda cell: cell.dt.day_of_year()) + +------+ + | a | + | --- | + | i16 | + +======+ + | 365 | + | 1 | + | 366 | + | null | + +------+ + """ + + @abstractmethod + def hour(self) -> Cell[int | None]: + """ + Extract the hour from a datetime or time. + + Returns + ------- + cell: + The hour. + + Examples + -------- + >>> from datetime import datetime, time + >>> from safeds.data.tabular.containers import Column + >>> column1 = Column("a", [datetime(2000, 1, 1, hour=0), datetime(2000, 1, 1, hour=12), None]) + >>> column1.transform(lambda cell: cell.dt.hour()) + +------+ + | a | + | --- | + | i8 | + +======+ + | 0 | + | 12 | + | null | + +------+ + + >>> column2 = Column("a", [time(hour=0), time(hour=12), None]) + >>> column2.transform(lambda cell: cell.dt.hour()) + +------+ + | a | + | --- | + | i8 | + +======+ + | 0 | + | 12 | + | null | + +------+ + """ + + @abstractmethod + def microsecond(self) -> Cell[int | None]: + """ + Extract the microsecond from a datetime or time. + + Returns + ------- + cell: + The microsecond. + + Examples + -------- + >>> from datetime import datetime, time + >>> from safeds.data.tabular.containers import Column + >>> column1 = Column("a", [datetime(2000, 1, 1, microsecond=0), datetime(2000, 1, 1, microsecond=500), None]) + >>> column1.transform(lambda cell: cell.dt.microsecond()) + +------+ + | a | + | --- | + | i32 | + +======+ + | 0 | + | 500 | + | null | + +------+ + + >>> column2 = Column("a", [time(microsecond=0), time(microsecond=500), None]) + >>> column2.transform(lambda cell: cell.dt.microsecond()) + +------+ + | a | + | --- | + | i32 | + +======+ + | 0 | + | 500 | + | null | + +------+ + """ + + @abstractmethod + def millennium(self) -> Cell[int | None]: + """ + Extract the millennium from a datetime or date. + + Note that since our calendar begins with year 1 the first millennium lasts from year 1 to year 1000. Subsequent + centuries begin with years ending in "001" and end with years ending in "000". + + Returns + ------- + cell: + The millennium. + + Examples + -------- + >>> from datetime import datetime, date + >>> from safeds.data.tabular.containers import Column + >>> column1 = Column("a", [datetime(1999, 12, 31), datetime(2000, 1, 1), datetime(2001, 1, 1), None]) + >>> column1.transform(lambda cell: cell.dt.millennium()) +------+ | a | | --- | | i32 | +======+ - | 2022 | + | 2 | + | 2 | + | 3 | + | null | + +------+ + + >>> column2 = Column("a", [date(1999, 12, 31), date(2000, 1, 1), date(2001, 1, 1), None]) + >>> column2.transform(lambda cell: cell.dt.millennium()) + +------+ + | a | + | --- | + | i32 | + +======+ + | 2 | + | 2 | + | 3 | + | null | + +------+ + """ + + @abstractmethod + def millisecond(self) -> Cell[int | None]: + """ + Extract the millisecond from a datetime or time. + + Returns + ------- + cell: + The millisecond. + + Examples + -------- + >>> from datetime import datetime, time + >>> from safeds.data.tabular.containers import Column + >>> column1 = Column("a", [datetime(2000, 1, 1, microsecond=0), datetime(2000, 1, 1, microsecond=500000), None]) + >>> column1.transform(lambda cell: cell.dt.millisecond()) + +------+ + | a | + | --- | + | i32 | + +======+ + | 0 | + | 500 | + | null | + +------+ + + >>> column2 = Column("a", [time(microsecond=0), time(microsecond=500000), None]) + >>> column2.transform(lambda cell: cell.dt.millisecond()) + +------+ + | a | + | --- | + | i32 | + +======+ + | 0 | + | 500 | + | null | + +------+ + """ + + @abstractmethod + def minute(self) -> Cell[int | None]: + """ + Extract the minute from a datetime or time. + + Returns + ------- + cell: + The minute. + + Examples + -------- + >>> from datetime import datetime, time + >>> from safeds.data.tabular.containers import Column + >>> column1 = Column("a", [datetime(2000, 1, 1, minute=0), datetime(2000, 1, 1, minute=30), None]) + >>> column1.transform(lambda cell: cell.dt.minute()) + +------+ + | a | + | --- | + | i8 | + +======+ + | 0 | + | 30 | + | null | + +------+ + + >>> column2 = Column("a", [time(minute=0), time(minute=30), None]) + >>> column2.transform(lambda cell: cell.dt.minute()) + +------+ + | a | + | --- | + | i8 | + +======+ + | 0 | + | 30 | + | null | +------+ """ @abstractmethod def month(self) -> Cell[int | None]: """ - Get the month of the underlying date(time) data. + Extract the month from a datetime or date. Returns ------- cell: - A cell containing the month as integer. + The month. Examples -------- + >>> from datetime import datetime, date >>> from safeds.data.tabular.containers import Column - >>> import datetime - >>> column = Column("a", [datetime.date(2022, 1, 9)]) - >>> column.transform(lambda cell: cell.dt.month()) - +-----+ - | a | - | --- | - | i8 | - +=====+ - | 1 | - +-----+ + >>> column1 = Column("a", [datetime(1999, 12, 31), datetime(2000, 1, 1), None]) + >>> column1.transform(lambda cell: cell.dt.month()) + +------+ + | a | + | --- | + | i8 | + +======+ + | 12 | + | 1 | + | null | + +------+ + + >>> column2 = Column("a", [date(1999, 12, 31), date(2000, 1, 1), None]) + >>> column2.transform(lambda cell: cell.dt.month()) + +------+ + | a | + | --- | + | i8 | + +======+ + | 12 | + | 1 | + | null | + +------+ """ @abstractmethod - def day(self) -> Cell[int | None]: + def quarter(self) -> Cell[int | None]: """ - Get the day of the underlying date(time) data. + Extract the quarter from a datetime or date. + + The quarter is a number between 1 and 4: + + - 1: January to March + - 2: April to June + - 3: July to September + - 4: October to December Returns ------- cell: - A cell containing the day as integer. + The quarter. Examples -------- + >>> from datetime import datetime, date >>> from safeds.data.tabular.containers import Column - >>> import datetime - >>> column = Column("a", [datetime.date(2022, 1, 9)]) - >>> column.transform(lambda cell: cell.dt.day()) - +-----+ - | a | - | --- | - | i8 | - +=====+ - | 9 | - +-----+ + >>> column1 = Column("a", [datetime(1999, 12, 31), datetime(2000, 1, 1), datetime(2000, 4, 1), None]) + >>> column1.transform(lambda cell: cell.dt.quarter()) + +------+ + | a | + | --- | + | i8 | + +======+ + | 4 | + | 1 | + | 2 | + | null | + +------+ + + >>> column2 = Column("a", [date(1999, 12, 31), date(2000, 1, 1), date(2000, 4, 1), None]) + >>> column2.transform(lambda cell: cell.dt.quarter()) + +------+ + | a | + | --- | + | i8 | + +======+ + | 4 | + | 1 | + | 2 | + | null | + +------+ + """ + + @abstractmethod + def second(self) -> Cell[int | None]: + """ + Extract the second from a datetime or time. + + Returns + ------- + cell: + The second. + + Examples + -------- + >>> from datetime import datetime, time + >>> from safeds.data.tabular.containers import Column + >>> column1 = Column("a", [datetime(2000, 1, 1, second=0), datetime(2000, 1, 1, second=30), None]) + >>> column1.transform(lambda cell: cell.dt.second()) + +------+ + | a | + | --- | + | i8 | + +======+ + | 0 | + | 30 | + | null | + +------+ + + >>> column2 = Column("a", [time(second=0), time(second=30), None]) + >>> column2.transform(lambda cell: cell.dt.second()) + +------+ + | a | + | --- | + | i8 | + +======+ + | 0 | + | 30 | + | null | + +------+ + """ + + @abstractmethod + def time(self) -> Cell[python_datetime.time | None]: + """ + Extract the time from a datetime. + + Returns + ------- + cell: + The time. + + Examples + -------- + >>> from datetime import datetime + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [datetime(1999, 12, 31), datetime(2000, 1, 1, 12, 30, 0), None]) + >>> column.transform(lambda cell: cell.dt.time()) + +----------+ + | a | + | --- | + | time | + +==========+ + | 00:00:00 | + | 12:30:00 | + | null | + +----------+ + """ + + @abstractmethod + def week(self) -> Cell[int | None]: + """ + Extract the week from a datetime or date. + + The week is a number between 1 and 53. The first week of a year starts at the first Monday of the year. + Subsequent weeks start on Monday and end on Sunday. + + Returns + ------- + cell: + The week. + + Examples + -------- + >>> from datetime import datetime, date + >>> from safeds.data.tabular.containers import Column + >>> column1 = Column("a", [datetime(1999, 12, 31), datetime(2000, 1, 2), datetime(2000, 1, 3), None]) + >>> column1.transform(lambda cell: cell.dt.week()) + +------+ + | a | + | --- | + | i8 | + +======+ + | 52 | + | 52 | + | 1 | + | null | + +------+ + + >>> column2 = Column("a", [date(1999, 12, 31), date(2000, 1, 2), date(2000, 1, 4), None]) + >>> column2.transform(lambda cell: cell.dt.week()) + +------+ + | a | + | --- | + | i8 | + +======+ + | 52 | + | 52 | + | 1 | + | null | + +------+ + """ + + @abstractmethod + def weekday(self) -> Cell[int | None]: + """ + Extract the weekday from a datetime or date. + + The weekday is a number between 1 (Monday) and 7 (Sunday). + + Returns + ------- + cell: + The weekday. + + Examples + -------- + >>> from datetime import datetime, date + >>> from safeds.data.tabular.containers import Column + >>> column1 = Column("a", [datetime(2000, 1, 1), datetime(2000, 1, 2), None]) + >>> column1.transform(lambda cell: cell.dt.weekday()) + +------+ + | a | + | --- | + | i8 | + +======+ + | 6 | + | 7 | + | null | + +------+ + + >>> column2 = Column("a", [date(2000, 1, 1), date(2000, 1, 2), None]) + >>> column2.transform(lambda cell: cell.dt.weekday()) + +------+ + | a | + | --- | + | i8 | + +======+ + | 6 | + | 7 | + | null | + +------+ """ @abstractmethod - def to_string(self, format_string: str = "%F") -> Cell[str | None]: + def year(self) -> Cell[int | None]: + """ + Extract the year from a datetime or date. + + Returns + ------- + cell: + The year. + + Examples + -------- + >>> from datetime import datetime, date + >>> from safeds.data.tabular.containers import Column + >>> column1 = Column("a", [datetime(1999, 12, 31), datetime(2000, 1, 1), None]) + >>> column1.transform(lambda cell: cell.dt.year()) + +------+ + | a | + | --- | + | i32 | + +======+ + | 1999 | + | 2000 | + | null | + +------+ + + >>> column2 = Column("a", [date(1999, 12, 31), date(2000, 1, 1), None]) + >>> column2.transform(lambda cell: cell.dt.year()) + +------+ + | a | + | --- | + | i32 | + +======+ + | 1999 | + | 2000 | + | null | + +------+ + """ + + # ------------------------------------------------------------------------------------------------------------------ + # Other operations + # ------------------------------------------------------------------------------------------------------------------ + + @abstractmethod + def is_in_leap_year(self) -> Cell[bool | None]: + """ + Check a datetime or date is in a leap year. + + Returns + ------- + cell: + Whether the year is a leap year. + + Examples + -------- + >>> from datetime import datetime, date + >>> from safeds.data.tabular.containers import Column + >>> column1 = Column("a", [datetime(1900, 1, 1), datetime(2000, 1, 1), None]) + >>> column1.transform(lambda cell: cell.dt.is_in_leap_year()) + +-------+ + | a | + | --- | + | bool | + +=======+ + | false | + | true | + | null | + +-------+ + + >>> column2 = Column("a", [date(1900, 1, 1), date(2000, 1, 1), None]) + >>> column2.transform(lambda cell: cell.dt.is_in_leap_year()) + +-------+ + | a | + | --- | + | bool | + +=======+ + | false | + | true | + | null | + +-------+ + """ + + # TODO: update documentation + @abstractmethod + def to_string(self, *, format: str = "iso") -> Cell[str | None]: """ Convert the datetime/date/time value in the cell to a string. Parameters ---------- - format_string: + format: The format string it will be used to convert the data into the string. Returns @@ -221,14 +747,16 @@ def to_string(self, format_string: str = "%F") -> Cell[str | None]: cell: The string value. + Raises + ------ ValueError - If the formatstring is invalid. + If the format is invalid. Examples -------- >>> from safeds.data.tabular.containers import Column >>> import datetime - >>> column = Column("a", [datetime.date(2022, 1, 9)]) + >>> column = Column("a", [datetime.datetime(2022, 1, 9, 1)]) >>> column.transform(lambda cell: cell.dt.to_string()) +------------+ | a | @@ -238,3 +766,33 @@ def to_string(self, format_string: str = "%F") -> Cell[str | None]: | 2022-01-09 | +------------+ """ + + @abstractmethod + def unix_time(self) -> Cell[int | None]: + """ + Get the Unix time from a datetime. + + The Unix time is the elapsed time since 00:00:00 UTC on 1 January 1970. This method returns the Unix time in + microseconds. + + Returns + ------- + cell: + The Unix time. + + Examples + -------- + >>> from datetime import datetime + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [datetime(1970, 1, 1), datetime(1970, 1, 2), None]) + >>> column.transform(lambda cell: cell.dt.unix_time()) + +-------------+ + | a | + | --- | + | i64 | + +=============+ + | 0 | + | 86400000000 | + | null | + +-------------+ + """ diff --git a/src/safeds/data/tabular/query/_lazy_datetime_operations.py b/src/safeds/data/tabular/query/_lazy_datetime_operations.py index cfd66ea67..a67b1500f 100644 --- a/src/safeds/data/tabular/query/_lazy_datetime_operations.py +++ b/src/safeds/data/tabular/query/_lazy_datetime_operations.py @@ -8,6 +8,8 @@ from ._datetime_operations import DatetimeOperations if TYPE_CHECKING: + import datetime as python_datetime + import polars as pl from safeds.data.tabular.containers._cell import Cell @@ -41,34 +43,71 @@ def __str__(self) -> str: return f"({self._expression}).dt" # ------------------------------------------------------------------------------------------------------------------ - # Temporal operations + # Extract components # ------------------------------------------------------------------------------------------------------------------ def century(self) -> Cell[int | None]: return _LazyCell(self._expression.dt.century()) - def weekday(self) -> Cell[int | None]: - return _LazyCell(self._expression.dt.weekday()) + def date(self) -> Cell[python_datetime.date | None]: + return _LazyCell(self._expression.dt.date()) + + def day(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.day()) + + def day_of_year(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.ordinal_day()) + + def hour(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.hour()) + + def microsecond(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.microsecond()) + + def millennium(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.millennium()) + + def millisecond(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.millisecond()) + + def minute(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.minute()) + + def month(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.month()) + + def quarter(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.quarter()) + + def second(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.second()) + + def time(self) -> Cell[python_datetime.time | None]: + return _LazyCell(self._expression.dt.time()) def week(self) -> Cell[int | None]: return _LazyCell(self._expression.dt.week()) + def weekday(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.weekday()) + def year(self) -> Cell[int | None]: return _LazyCell(self._expression.dt.year()) - def month(self) -> Cell[int | None]: - return _LazyCell(self._expression.dt.month()) - - def day(self) -> Cell[int | None]: - return _LazyCell(self._expression.dt.day()) + # ------------------------------------------------------------------------------------------------------------------ + # Other operations + # ------------------------------------------------------------------------------------------------------------------ - def hour(self) -> Cell[int | None]: - return _LazyCell(self._expression.dt.hour()) + def is_in_leap_year(self) -> Cell[bool | None]: + return _LazyCell(self._expression.dt.is_leap_year()) - def to_string(self, format_string: str = "%F") -> Cell[str | None]: - if not _check_format_string(format_string): + def to_string(self, *, format: str = "iso:strict") -> Cell[str | None]: + if not _check_format_string(format): raise ValueError("Invalid format string") - return _LazyCell(self._expression.dt.to_string(format=format_string)) + return _LazyCell(self._expression.dt.to_string(format=format)) + + def unix_time(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.epoch()) def _check_format_string(format_string: str) -> bool: From 1a3b12320d27bc27db7ba0a5d64d0df77fdb4c1e Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Thu, 16 Jan 2025 20:15:31 +0100 Subject: [PATCH 24/57] feat: add `replace` method --- .../tabular/query/_datetime_operations.py | 85 +++++++++++++++++-- .../query/_lazy_datetime_operations.py | 30 ++++++- 2 files changed, 104 insertions(+), 11 deletions(-) diff --git a/src/safeds/data/tabular/query/_datetime_operations.py b/src/safeds/data/tabular/query/_datetime_operations.py index 9f96dc539..7f0bf8716 100644 --- a/src/safeds/data/tabular/query/_datetime_operations.py +++ b/src/safeds/data/tabular/query/_datetime_operations.py @@ -6,13 +6,10 @@ if TYPE_CHECKING: import datetime as python_datetime + from safeds._typing import _ConvertibleToIntCell from safeds.data.tabular.containers import Cell -# TODO: Examples with None -# TODO: add hour etc. - - class DatetimeOperations(ABC): """ Namespace for operations on datetimes, dates, and times. @@ -731,21 +728,91 @@ def is_in_leap_year(self) -> Cell[bool | None]: +-------+ """ - # TODO: update documentation + @abstractmethod + def replace( + self, + *, + year: _ConvertibleToIntCell = None, + month: _ConvertibleToIntCell = None, + day: _ConvertibleToIntCell = None, + hour: _ConvertibleToIntCell = None, + minute: _ConvertibleToIntCell = None, + second: _ConvertibleToIntCell = None, + microsecond: _ConvertibleToIntCell = None, + ) -> Cell: + """ + Replace components of a datetime or date. + + If a component is not provided, it is not changed. Components that are not applicable to the object are ignored, + e.g. setting the hour of a date. Invalid results are converted to missing values (`None`). + + Parameters + ---------- + year: + The new year. + month: + The new month. Must be between 1 and 12. + day: + The new day. Must be between 1 and 31. + hour: + The new hour. Must be between 0 and 23. + minute: + The new minute. Must be between 0 and 59. + second: + The new second. Must be between 0 and 59. + microsecond: + The new microsecond. Must be between 0 and 999999. + + Returns + ------- + cell: + The new datetime or date. + + Examples + -------- + >>> from datetime import datetime, date, time + >>> from safeds.data.tabular.containers import Column + >>> column1 = Column("a", [datetime(2000, 1, 1), None]) + >>> column1.transform(lambda cell: cell.dt.replace(month=2, day=2, hour=2)) + +---------------------+ + | a | + | --- | + | datetime[μs] | + +=====================+ + | 2000-02-02 02:00:00 | + | null | + +---------------------+ + + >>> column2 = Column("a", [date(2000, 1, 1), None]) + >>> column2.transform(lambda cell: cell.dt.replace(month=2, day=2, hour=2)) + +------------+ + | a | + | --- | + | date | + +============+ + | 2000-02-02 | + | null | + +------------+ + """ + + # TODO: explain format string + more examples @abstractmethod def to_string(self, *, format: str = "iso") -> Cell[str | None]: """ - Convert the datetime/date/time value in the cell to a string. + Convert a datetime, date, or time to a string. + + The format can be either the special value "iso" to create ISO 8601 strings or a custom format string. The + custom format string can contain the following placeholders: Parameters ---------- format: - The format string it will be used to convert the data into the string. + The format to use. Returns ------- cell: - The string value. + The string representation. Raises ------ @@ -756,7 +823,7 @@ def to_string(self, *, format: str = "iso") -> Cell[str | None]: -------- >>> from safeds.data.tabular.containers import Column >>> import datetime - >>> column = Column("a", [datetime.datetime(2022, 1, 9, 1)]) + >>> column = Column("a", [datetime.date(2022, 1, 9)]) >>> column.transform(lambda cell: cell.dt.to_string()) +------------+ | a | diff --git a/src/safeds/data/tabular/query/_lazy_datetime_operations.py b/src/safeds/data/tabular/query/_lazy_datetime_operations.py index a67b1500f..bbdb6f661 100644 --- a/src/safeds/data/tabular/query/_lazy_datetime_operations.py +++ b/src/safeds/data/tabular/query/_lazy_datetime_operations.py @@ -12,6 +12,7 @@ import polars as pl + from safeds._typing import _ConvertibleToIntCell from safeds.data.tabular.containers._cell import Cell @@ -101,8 +102,33 @@ def year(self) -> Cell[int | None]: def is_in_leap_year(self) -> Cell[bool | None]: return _LazyCell(self._expression.dt.is_leap_year()) - def to_string(self, *, format: str = "iso:strict") -> Cell[str | None]: - if not _check_format_string(format): + def replace( + self, + *, + year: _ConvertibleToIntCell = None, + month: _ConvertibleToIntCell = None, + day: _ConvertibleToIntCell = None, + hour: _ConvertibleToIntCell = None, + minute: _ConvertibleToIntCell = None, + second: _ConvertibleToIntCell = None, + microsecond: _ConvertibleToIntCell = None, + ) -> Cell: + return _LazyCell( + self._expression.dt.replace( + year=year, + month=month, + day=day, + hour=hour, + minute=minute, + second=second, + microsecond=microsecond, + ), + ) + + def to_string(self, *, format: str = "iso") -> Cell[str | None]: + if format == "iso": + format = "iso:strict" # noqa: A001 + elif not _check_format_string(format): raise ValueError("Invalid format string") return _LazyCell(self._expression.dt.to_string(format=format)) From edd4b5850b0910106ab6b18e8eebc099d175c81e Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sat, 18 Jan 2025 17:30:56 +0100 Subject: [PATCH 25/57] refactor: extract method to get similar strings --- src/safeds/_utils/__init__.py | 9 +- src/safeds/_utils/_string.py | 9 ++ src/safeds/_validation/__init__.py | 3 + .../_check_columns_exist_module.py | 14 +-- ...onvert_and_check_datetime_format_module.py | 110 ++++++++++++++++++ .../_normalize_and_check_file_path_module.py | 4 +- .../safeds/_utils/test_get_similar_strings.py | 38 ++++++ tests/safeds/_validation/__init__.py | 0 .../_validation/test_get_similar_columns.py | 39 ------- 9 files changed, 170 insertions(+), 56 deletions(-) create mode 100644 src/safeds/_utils/_string.py create mode 100644 src/safeds/_validation/_convert_and_check_datetime_format_module.py create mode 100644 tests/safeds/_utils/test_get_similar_strings.py delete mode 100644 tests/safeds/_validation/__init__.py delete mode 100644 tests/safeds/_validation/test_get_similar_columns.py diff --git a/src/safeds/_utils/__init__.py b/src/safeds/_utils/__init__.py index fc3d501d4..ca7eabfa8 100644 --- a/src/safeds/_utils/__init__.py +++ b/src/safeds/_utils/__init__.py @@ -10,16 +10,18 @@ from ._lazy import _safe_collect_lazy_frame, _safe_collect_lazy_frame_schema from ._plotting import _figure_to_image from ._random import _get_random_seed + from ._string import _get_similar_strings apipkg.initpkg( __name__, { "_compute_duplicates": "._collections:_compute_duplicates", - "_structural_hash": "._hashing:_structural_hash", - "_safe_collect_lazy_frame": "._lazy:_safe_collect_lazy_frame", - "_safe_collect_lazy_frame_schema": "._lazy:_safe_collect_lazy_frame_schema", "_figure_to_image": "._plotting:_figure_to_image", "_get_random_seed": "._random:_get_random_seed", + "_get_similar_strings": "._string:_get_similar_strings", + "_safe_collect_lazy_frame": "._lazy:_safe_collect_lazy_frame", + "_safe_collect_lazy_frame_schema": "._lazy:_safe_collect_lazy_frame_schema", + "_structural_hash": "._hashing:_structural_hash", }, ) @@ -27,6 +29,7 @@ "_compute_duplicates", "_figure_to_image", "_get_random_seed", + "_get_similar_strings", "_safe_collect_lazy_frame", "_safe_collect_lazy_frame_schema", "_structural_hash", diff --git a/src/safeds/_utils/_string.py b/src/safeds/_utils/_string.py new file mode 100644 index 000000000..40bf86f6b --- /dev/null +++ b/src/safeds/_utils/_string.py @@ -0,0 +1,9 @@ +def _get_similar_strings(string: str, valid_strings: list[str]) -> list[str]: + from difflib import get_close_matches + + close_matches = get_close_matches(string, valid_strings, n=3) + + if close_matches and close_matches[0] == string: + return close_matches[0:1] + else: + return close_matches diff --git a/src/safeds/_validation/__init__.py b/src/safeds/_validation/__init__.py index efb49c1b6..ecf75ca06 100644 --- a/src/safeds/_validation/__init__.py +++ b/src/safeds/_validation/__init__.py @@ -13,6 +13,7 @@ from ._check_indices_module import _check_indices from ._check_row_counts_are_equal_module import _check_row_counts_are_equal from ._check_schema_module import _check_schema + from ._convert_and_check_datetime_format_module import _convert_and_check_datetime_format from ._normalize_and_check_file_path_module import _normalize_and_check_file_path apipkg.initpkg( @@ -29,6 +30,7 @@ "_check_indices": "._check_indices_module:_check_indices", "_check_row_counts_are_equal": "._check_row_counts_are_equal_module:_check_row_counts_are_equal", "_check_schema": "._check_schema_module:_check_schema", + "_convert_and_check_datetime_format": "._convert_and_check_datetime_format_module:_convert_and_check_datetime_format", "_normalize_and_check_file_path": "._normalize_and_check_file_path_module:_normalize_and_check_file_path", }, ) @@ -45,5 +47,6 @@ "_check_indices", "_check_row_counts_are_equal", "_check_schema", + "_convert_and_check_datetime_format", "_normalize_and_check_file_path", ] diff --git a/src/safeds/_validation/_check_columns_exist_module.py b/src/safeds/_validation/_check_columns_exist_module.py index 7e1724921..d426f148b 100644 --- a/src/safeds/_validation/_check_columns_exist_module.py +++ b/src/safeds/_validation/_check_columns_exist_module.py @@ -4,6 +4,7 @@ from typing import TYPE_CHECKING +from safeds._utils import _get_similar_strings from safeds.exceptions import ColumnNotFoundError if TYPE_CHECKING: @@ -52,20 +53,9 @@ def _build_error_message(schema: Schema, unknown_names: list[str]) -> str: result = "Could not find column(s):" for unknown_name in unknown_names: - similar_columns = _get_similar_column_names(schema, unknown_name) + similar_columns = _get_similar_strings(unknown_name, schema.column_names) result += f"\n - '{unknown_name}'" if similar_columns: result += f": Did you mean one of {similar_columns}?" return result - - -def _get_similar_column_names(schema: Schema, name: str) -> list[str]: - from difflib import get_close_matches - - close_matches = get_close_matches(name, schema.column_names, n=3) - - if close_matches and close_matches[0] == name: - return close_matches[0:1] - else: - return close_matches diff --git a/src/safeds/_validation/_convert_and_check_datetime_format_module.py b/src/safeds/_validation/_convert_and_check_datetime_format_module.py new file mode 100644 index 000000000..0e20ca3fb --- /dev/null +++ b/src/safeds/_validation/_convert_and_check_datetime_format_module.py @@ -0,0 +1,110 @@ +def _convert_and_check_datetime_format( + format: str, + used_for_parsing: bool, +) -> str: + converted_format = "" + index = 0 + + while index < len(format): + char = char_at(format, index) + + # Escaped characters + if char == "\\" and char_at(format, index + 1) == "\\": + converted_format += "\\" + index += 2 + if char == "\\" and char_at(format, index + 1) == "{": + converted_format += "{" + index += 2 + # Characters that need to be escaped for rust's chrono crate + elif char == "\n": + converted_format += "%n" + index += 1 + elif char == "\t": + converted_format += "%t" + index += 1 + elif char == "%": + converted_format += "%%" + index += 1 + # Template expression + elif char == "{": + # Find the closing curly brace + closing_brace_index = format.find("}", index) + if closing_brace_index == -1: + raise ValueError(f"Unclosed template expression at index {index}.") + + expression = format[index + 1 : closing_brace_index] + converted_format += _convert_and_check_template_expression(expression, used_for_parsing) + index = closing_brace_index + 1 + # Regular characters + else: + converted_format += char + index += 1 + + return converted_format + + +def char_at(string: str, i: int) -> str | None: + if i >= len(string): + return None + return string[i] + + +def _convert_and_check_template_expression( + expression: str, + used_for_parsing: bool, +) -> str: + converted_expression = expression + + return converted_expression + + +# def _check_format_string(format_string: str) -> bool: +# valid_format_codes = { +# "F": "the standard", +# "a": "abbreviated weekday name", +# "A": "full weekday name", +# "w": "weekday as a decimal number", +# "d": "day of the month as a zero-padded decimal number", +# "b": "abbreviated month name", +# "B": "full month name", +# "m": "month as a zero-padded decimal number", +# "y": "year without century as a zero-padded decimal number", +# "Y": "year with century as a decimal number", +# "H": "hour (24-hour clock) as a zero-padded decimal number", +# "I": "hour (12-hour clock) as a zero-padded decimal number", +# "p": "locale's equivalent of either AM or PM", +# "M": "minute as a zero-padded decimal number", +# "S": "second as a zero-padded decimal number", +# "f": "microsecond as a zero-padded decimal number", +# "z": "UTC offset in the form ±HHMM[SS[.ffffff]]", +# "Z": "time zone name", +# "j": "day of the year as a zero-padded decimal number", +# "U": "week number of the year (Sunday as the first day of the week)", +# "W": "week number of the year (Monday as the first day of the week)", +# "c": "locale's appropriate date and time representation", +# "x": "locale's appropriate date representation", +# "X": "locale's appropriate time representation", +# "%": "a literal '%' character", +# } +# +# # Keep track of the positions in the string +# i = 0 +# n = len(format_string) +# +# # Iterate over each character in the format string +# while i < n: +# if format_string[i] == "%": +# # Make sure there's at least one character following the '%' +# if i + 1 < n: +# code = format_string[i + 1] +# # Check if the following character is a valid format code +# if code not in valid_format_codes: +# return False +# i += 2 # Skip ahead past the format code +# else: +# # '%' is at the end of the string with no following format code +# return False +# else: +# i += 1 # Continue to the next character +# +# return True diff --git a/src/safeds/_validation/_normalize_and_check_file_path_module.py b/src/safeds/_validation/_normalize_and_check_file_path_module.py index 2ba054dfd..c5fe8e8c8 100644 --- a/src/safeds/_validation/_normalize_and_check_file_path_module.py +++ b/src/safeds/_validation/_normalize_and_check_file_path_module.py @@ -15,12 +15,12 @@ def _normalize_and_check_file_path( check_if_file_exists: bool = False, ) -> Path: """ - Check whether the provided path is a valid file path and normalize it. + Normalize a path and check its validity. Parameters ---------- path: - Path to check and normalize. + Path to normalize and check. canonical_file_extension: If the path has no extension, this extension will be added. It should include the leading dot. valid_file_extensions: diff --git a/tests/safeds/_utils/test_get_similar_strings.py b/tests/safeds/_utils/test_get_similar_strings.py new file mode 100644 index 000000000..3acd29b09 --- /dev/null +++ b/tests/safeds/_utils/test_get_similar_strings.py @@ -0,0 +1,38 @@ +import pytest + +from safeds._utils import _get_similar_strings + + +@pytest.mark.parametrize( + ("string", "valid_strings", "expected"), + [ + ( + "column1", + [], + [], + ), + ( + "column1", + ["column1", "column2"], + ["column1"], + ), + ( + "dissimilar", + ["column1", "column2", "column3"], + [], + ), + ( + "cilumn1", + ["column1", "x", "y"], + ["column1"], + ), + ( + "cilumn1", + ["column1", "column2", "y"], + ["column1", "column2"], + ), + ], + ids=["empty", "exact match", "no similar", "one similar", "multiple similar"], +) +def test_should_get_similar_strings(string: str, valid_strings: list[str], expected: list[str]) -> None: + assert _get_similar_strings(string, valid_strings) == expected diff --git a/tests/safeds/_validation/__init__.py b/tests/safeds/_validation/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/safeds/_validation/test_get_similar_columns.py b/tests/safeds/_validation/test_get_similar_columns.py deleted file mode 100644 index 1d6505a0b..000000000 --- a/tests/safeds/_validation/test_get_similar_columns.py +++ /dev/null @@ -1,39 +0,0 @@ -import pytest - -from safeds._validation._check_columns_exist_module import _get_similar_column_names -from safeds.data.tabular.containers import Table - - -@pytest.mark.parametrize( - ("table", "name", "expected"), - [ - ( - Table({}), - "column1", - [], - ), - ( - Table({"column1": [], "column2": []}), - "column1", - ["column1"], - ), - ( - Table({"column1": [], "column2": [], "column3": []}), - "dissimilar", - [], - ), - ( - Table({"column1": [], "x": [], "y": []}), - "cilumn1", - ["column1"], - ), - ( - Table({"column1": [], "column2": [], "y": []}), - "cilumn1", - ["column1", "column2"], - ), - ], - ids=["empty table", "exact match", "no similar", "one similar", "multiple similar"], -) -def test_should_get_similar_column_names(table: Table, name: str, expected: list[str]) -> None: - assert _get_similar_column_names(table.schema, name) == expected From dff8a5b2d8aedf74f2bc95c753b759e1fd1691b9 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sat, 18 Jan 2025 18:21:03 +0100 Subject: [PATCH 26/57] feat: custom format strings for datetimes --- ...onvert_and_check_datetime_format_module.py | 182 ++++++++++++------ .../tabular/query/_datetime_operations.py | 123 ++++++------ .../query/_lazy_datetime_operations.py | 64 +----- .../tabular/query/_lazy_string_operations.py | 27 ++- .../data/tabular/query/_string_operations.py | 10 +- .../_lazy_datetime_operations/test_weekday.py | 2 +- 6 files changed, 219 insertions(+), 189 deletions(-) diff --git a/src/safeds/_validation/_convert_and_check_datetime_format_module.py b/src/safeds/_validation/_convert_and_check_datetime_format_module.py index 0e20ca3fb..d68ad7c96 100644 --- a/src/safeds/_validation/_convert_and_check_datetime_format_module.py +++ b/src/safeds/_validation/_convert_and_check_datetime_format_module.py @@ -1,7 +1,88 @@ +from __future__ import annotations + +from typing import Literal + +from safeds._utils import _get_similar_strings + +_DATE_REPLACEMENTS = { + # Year + "Y": "Y", + "_Y": "_Y", + "^Y": "-Y", + "Y99": "y", + "_Y99": "_y", + "^Y99": "-y", + # Month + "M": "m", + "_M": "_m", + "^M": "-m", + "M-full": "B", + "M-abbr": "b", + # Week + "W": "V", + "_W": "_V", + "^W": "-V", + # Day + "D": "d", + "_D": "_d", + "^D": "-d", + "DOW": "u", + "DOW-full": "A", + "DOW-abbr": "a", + "DOY": "j", +} + +_TIME_REPLACEMENTS = { + # Hour + "h": "H", + "_h": "_H", + "^h": "-H", + "h12": "I", + "_h12": "_I", + "^h12": "-I", + # Minute + "m": "M", + "_m": "_M", + "^m": "-M", + # Second + "s": "S", + "_s": "_S", + "^s": "-S", + # Fractional seconds + ".f": ".f", + "ms": "3f", + "us": "6f", + "ns": "9f", + # AM/PM + "AM/PM": "p", + "am/pm": "P", +} + +_DATETIME_REPLACEMENTS = { + # Date and time replacements are also valid for datetime + **_DATE_REPLACEMENTS, + **_TIME_REPLACEMENTS, + # Timezone + "z": "z", + ":z": ":z", + # UNIX timestamp + "u": "s", +} + +_DATETIME_REPLACEMENTS_WHEN_PARSING = { + **_DATETIME_REPLACEMENTS, + # Allow omission of minutes for the timezone offset + "z": "#z", + ":z": "#z", +} + + def _convert_and_check_datetime_format( format: str, + type_: Literal["datetime", "date", "time"], used_for_parsing: bool, ) -> str: + replacements = _get_replacements(type_, used_for_parsing) converted_format = "" index = 0 @@ -27,14 +108,13 @@ def _convert_and_check_datetime_format( index += 1 # Template expression elif char == "{": - # Find the closing curly brace - closing_brace_index = format.find("}", index) - if closing_brace_index == -1: + end_index = format.find("}", index) + if end_index == -1: raise ValueError(f"Unclosed template expression at index {index}.") - expression = format[index + 1 : closing_brace_index] - converted_format += _convert_and_check_template_expression(expression, used_for_parsing) - index = closing_brace_index + 1 + expression = format[index + 1 : end_index] + converted_format += _convert_and_check_template_expression(expression, type_, replacements) + index = end_index + 1 # Regular characters else: converted_format += char @@ -43,6 +123,18 @@ def _convert_and_check_datetime_format( return converted_format +def _get_replacements( + type_: Literal["datetime", "date", "time"], + used_for_parsing: bool, +) -> dict[str, str]: + if type_ == "datetime": + return _DATETIME_REPLACEMENTS_WHEN_PARSING if used_for_parsing else _DATETIME_REPLACEMENTS + elif type_ == "date": + return _DATE_REPLACEMENTS + else: + return _TIME_REPLACEMENTS + + def char_at(string: str, i: int) -> str | None: if i >= len(string): return None @@ -51,60 +143,26 @@ def char_at(string: str, i: int) -> str | None: def _convert_and_check_template_expression( expression: str, - used_for_parsing: bool, + type_: str, + replacements: dict[str, str], ) -> str: - converted_expression = expression - - return converted_expression - - -# def _check_format_string(format_string: str) -> bool: -# valid_format_codes = { -# "F": "the standard", -# "a": "abbreviated weekday name", -# "A": "full weekday name", -# "w": "weekday as a decimal number", -# "d": "day of the month as a zero-padded decimal number", -# "b": "abbreviated month name", -# "B": "full month name", -# "m": "month as a zero-padded decimal number", -# "y": "year without century as a zero-padded decimal number", -# "Y": "year with century as a decimal number", -# "H": "hour (24-hour clock) as a zero-padded decimal number", -# "I": "hour (12-hour clock) as a zero-padded decimal number", -# "p": "locale's equivalent of either AM or PM", -# "M": "minute as a zero-padded decimal number", -# "S": "second as a zero-padded decimal number", -# "f": "microsecond as a zero-padded decimal number", -# "z": "UTC offset in the form ±HHMM[SS[.ffffff]]", -# "Z": "time zone name", -# "j": "day of the year as a zero-padded decimal number", -# "U": "week number of the year (Sunday as the first day of the week)", -# "W": "week number of the year (Monday as the first day of the week)", -# "c": "locale's appropriate date and time representation", -# "x": "locale's appropriate date representation", -# "X": "locale's appropriate time representation", -# "%": "a literal '%' character", -# } -# -# # Keep track of the positions in the string -# i = 0 -# n = len(format_string) -# -# # Iterate over each character in the format string -# while i < n: -# if format_string[i] == "%": -# # Make sure there's at least one character following the '%' -# if i + 1 < n: -# code = format_string[i + 1] -# # Check if the following character is a valid format code -# if code not in valid_format_codes: -# return False -# i += 2 # Skip ahead past the format code -# else: -# # '%' is at the end of the string with no following format code -# return False -# else: -# i += 1 # Continue to the next character -# -# return True + if expression in replacements: + return "%" + replacements[expression] + + # Unknown template expression + message = _build_error_message(expression, type_, list(replacements.keys())) + raise ValueError(message) + + +def _build_error_message( + expression: str, + type_: str, + valid_expressions: list[str], +) -> str: + result = f"Invalid template expression '{expression}' for type {type_}." + + similar_expressions = _get_similar_strings(expression, valid_expressions) + if similar_expressions: + result += f" Did you mean one of {similar_expressions}?" + + return result diff --git a/src/safeds/data/tabular/query/_datetime_operations.py b/src/safeds/data/tabular/query/_datetime_operations.py index 7f0bf8716..eac241859 100644 --- a/src/safeds/data/tabular/query/_datetime_operations.py +++ b/src/safeds/data/tabular/query/_datetime_operations.py @@ -165,6 +165,47 @@ def day(self) -> Cell[int | None]: +------+ """ + @abstractmethod + def day_of_week(self) -> Cell[int | None]: + """ + Extract the day of the week from a datetime or date as defined by ISO 8601. + + The day of the week is a number between 1 (Monday) and 7 (Sunday). + + Returns + ------- + cell: + The day of the week. + + Examples + -------- + >>> from datetime import datetime, date + >>> from safeds.data.tabular.containers import Column + >>> column1 = Column("a", [datetime(2000, 1, 1), datetime(2000, 1, 2), None]) + >>> column1.transform(lambda cell: cell.dt.day_of_week()) + +------+ + | a | + | --- | + | i8 | + +======+ + | 6 | + | 7 | + | null | + +------+ + + >>> column2 = Column("a", [date(2000, 1, 1), date(2000, 1, 2), None]) + >>> column2.transform(lambda cell: cell.dt.day_of_week()) + +------+ + | a | + | --- | + | i8 | + +======+ + | 6 | + | 7 | + | null | + +------+ + """ + @abstractmethod def day_of_year(self) -> Cell[int | None]: """ @@ -564,10 +605,11 @@ def time(self) -> Cell[python_datetime.time | None]: @abstractmethod def week(self) -> Cell[int | None]: """ - Extract the week from a datetime or date. + Extract the ISO 8601 week number from a datetime or date. - The week is a number between 1 and 53. The first week of a year starts at the first Monday of the year. - Subsequent weeks start on Monday and end on Sunday. + The week is a number between 1 and 53. The first week of a year is the week that contains the first Thursday of + the year. The last week of a year is the week that contains the last Thursday of the year. In other words, a + week is associated with a year if it contains the majority of its days. Returns ------- @@ -578,7 +620,7 @@ def week(self) -> Cell[int | None]: -------- >>> from datetime import datetime, date >>> from safeds.data.tabular.containers import Column - >>> column1 = Column("a", [datetime(1999, 12, 31), datetime(2000, 1, 2), datetime(2000, 1, 3), None]) + >>> column1 = Column("a", [datetime(1999, 12, 31), datetime(2000, 1, 2), datetime(2001, 12, 31), None]) >>> column1.transform(lambda cell: cell.dt.week()) +------+ | a | @@ -591,7 +633,7 @@ def week(self) -> Cell[int | None]: | null | +------+ - >>> column2 = Column("a", [date(1999, 12, 31), date(2000, 1, 2), date(2000, 1, 4), None]) + >>> column2 = Column("a", [date(1999, 12, 31), date(2000, 1, 2), datetime(2001, 12, 31), None]) >>> column2.transform(lambda cell: cell.dt.week()) +------+ | a | @@ -605,47 +647,6 @@ def week(self) -> Cell[int | None]: +------+ """ - @abstractmethod - def weekday(self) -> Cell[int | None]: - """ - Extract the weekday from a datetime or date. - - The weekday is a number between 1 (Monday) and 7 (Sunday). - - Returns - ------- - cell: - The weekday. - - Examples - -------- - >>> from datetime import datetime, date - >>> from safeds.data.tabular.containers import Column - >>> column1 = Column("a", [datetime(2000, 1, 1), datetime(2000, 1, 2), None]) - >>> column1.transform(lambda cell: cell.dt.weekday()) - +------+ - | a | - | --- | - | i8 | - +======+ - | 6 | - | 7 | - | null | - +------+ - - >>> column2 = Column("a", [date(2000, 1, 1), date(2000, 1, 2), None]) - >>> column2.transform(lambda cell: cell.dt.weekday()) - +------+ - | a | - | --- | - | i8 | - +======+ - | 6 | - | 7 | - | null | - +------+ - """ - @abstractmethod def year(self) -> Cell[int | None]: """ @@ -835,31 +836,31 @@ def to_string(self, *, format: str = "iso") -> Cell[str | None]: """ @abstractmethod - def unix_time(self) -> Cell[int | None]: + def unix_timestamp(self) -> Cell[int | None]: """ - Get the Unix time from a datetime. + Get the Unix timestamp from a datetime. - The Unix time is the elapsed time since 00:00:00 UTC on 1 January 1970. This method returns the Unix time in - microseconds. + A Unix timestamp is the elapsed time since 00:00:00 UTC on 1 January 1970. This method returns the value in + seconds. Returns ------- cell: - The Unix time. + The Unix timestamp. Examples -------- >>> from datetime import datetime >>> from safeds.data.tabular.containers import Column >>> column = Column("a", [datetime(1970, 1, 1), datetime(1970, 1, 2), None]) - >>> column.transform(lambda cell: cell.dt.unix_time()) - +-------------+ - | a | - | --- | - | i64 | - +=============+ - | 0 | - | 86400000000 | - | null | - +-------------+ + >>> column.transform(lambda cell: cell.dt.unix_timestamp()) + +-------+ + | a | + | --- | + | i64 | + +=======+ + | 0 | + | 86400 | + | null | + +-------+ """ diff --git a/src/safeds/data/tabular/query/_lazy_datetime_operations.py b/src/safeds/data/tabular/query/_lazy_datetime_operations.py index bbdb6f661..062c1fd90 100644 --- a/src/safeds/data/tabular/query/_lazy_datetime_operations.py +++ b/src/safeds/data/tabular/query/_lazy_datetime_operations.py @@ -3,6 +3,7 @@ from typing import TYPE_CHECKING from safeds._utils import _structural_hash +from safeds._validation import _convert_and_check_datetime_format from safeds.data.tabular.containers._lazy_cell import _LazyCell from ._datetime_operations import DatetimeOperations @@ -89,7 +90,7 @@ def time(self) -> Cell[python_datetime.time | None]: def week(self) -> Cell[int | None]: return _LazyCell(self._expression.dt.week()) - def weekday(self) -> Cell[int | None]: + def day_of_week(self) -> Cell[int | None]: return _LazyCell(self._expression.dt.weekday()) def year(self) -> Cell[int | None]: @@ -128,61 +129,10 @@ def replace( def to_string(self, *, format: str = "iso") -> Cell[str | None]: if format == "iso": format = "iso:strict" # noqa: A001 - elif not _check_format_string(format): - raise ValueError("Invalid format string") - return _LazyCell(self._expression.dt.to_string(format=format)) - - def unix_time(self) -> Cell[int | None]: - return _LazyCell(self._expression.dt.epoch()) - - -def _check_format_string(format_string: str) -> bool: - valid_format_codes = { - "F": "the standard", - "a": "abbreviated weekday name", - "A": "full weekday name", - "w": "weekday as a decimal number", - "d": "day of the month as a zero-padded decimal number", - "b": "abbreviated month name", - "B": "full month name", - "m": "month as a zero-padded decimal number", - "y": "year without century as a zero-padded decimal number", - "Y": "year with century as a decimal number", - "H": "hour (24-hour clock) as a zero-padded decimal number", - "I": "hour (12-hour clock) as a zero-padded decimal number", - "p": "locale's equivalent of either AM or PM", - "M": "minute as a zero-padded decimal number", - "S": "second as a zero-padded decimal number", - "f": "microsecond as a zero-padded decimal number", - "z": "UTC offset in the form ±HHMM[SS[.ffffff]]", - "Z": "time zone name", - "j": "day of the year as a zero-padded decimal number", - "U": "week number of the year (Sunday as the first day of the week)", - "W": "week number of the year (Monday as the first day of the week)", - "c": "locale's appropriate date and time representation", - "x": "locale's appropriate date representation", - "X": "locale's appropriate time representation", - "%": "a literal '%' character", - } - - # Keep track of the positions in the string - i = 0 - n = len(format_string) - - # Iterate over each character in the format string - while i < n: - if format_string[i] == "%": - # Make sure there's at least one character following the '%' - if i + 1 < n: - code = format_string[i + 1] - # Check if the following character is a valid format code - if code not in valid_format_codes: - return False - i += 2 # Skip ahead past the format code - else: - # '%' is at the end of the string with no following format code - return False else: - i += 1 # Continue to the next character + format = _convert_and_check_datetime_format(format, type_="datetime", used_for_parsing=False) # noqa: A001 + + return _LazyCell(self._expression.dt.to_string(format=format)) - return True + def unix_timestamp(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.epoch(time_unit="s")) diff --git a/src/safeds/data/tabular/query/_lazy_string_operations.py b/src/safeds/data/tabular/query/_lazy_string_operations.py index 5e6f751ae..d080b913e 100644 --- a/src/safeds/data/tabular/query/_lazy_string_operations.py +++ b/src/safeds/data/tabular/query/_lazy_string_operations.py @@ -3,6 +3,7 @@ from typing import TYPE_CHECKING from safeds._utils import _structural_hash +from safeds._validation import _convert_and_check_datetime_format from safeds.data.tabular.containers._lazy_cell import _LazyCell from ._string_operations import StringOperations @@ -76,11 +77,21 @@ def substring( ) -> Cell[str | None]: return _LazyCell(self._expression.str.slice(start, length)) - def to_date(self) -> Cell[datetime.date | None]: - return _LazyCell(self._expression.str.to_date(format="%F", strict=False)) + def to_date(self, *, format: str | None = None) -> Cell[datetime.date | None]: + if format == "iso": + format = "%F" # noqa: A001 + else: + format = _convert_and_check_datetime_format(format, type_="date", used_for_parsing=True) # noqa: A001 + + return _LazyCell(self._expression.str.to_date(format=format, strict=False)) + + def to_datetime(self, *, format: str | None = None) -> Cell[datetime.datetime | None]: + if format == "iso": + format = "%+" # noqa: A001 + else: + format = _convert_and_check_datetime_format(format, type_="datetime", used_for_parsing=True) # noqa: A001 - def to_datetime(self) -> Cell[datetime.datetime | None]: - return _LazyCell(self._expression.str.to_datetime(format="%+", strict=False)) + return _LazyCell(self._expression.str.to_datetime(format=format, strict=False)) def to_int(self, *, base: _ConvertibleToIntCell = 10) -> Cell[int | None]: return _LazyCell(self._expression.str.to_integer(base=base, strict=False)) @@ -88,6 +99,14 @@ def to_int(self, *, base: _ConvertibleToIntCell = 10) -> Cell[int | None]: def to_lowercase(self) -> Cell[str | None]: return _LazyCell(self._expression.str.to_lowercase()) + def to_time(self, *, format: str | None = None) -> Cell[datetime.time | None]: + if format == "iso": + format = "%T" # noqa: A001 + else: + format = _convert_and_check_datetime_format(format, type_="time", used_for_parsing=True) # noqa: A001 + + return _LazyCell(self._expression.str.to_time(format=format, strict=False)) + def to_uppercase(self) -> Cell[str | None]: return _LazyCell(self._expression.str.to_uppercase()) diff --git a/src/safeds/data/tabular/query/_string_operations.py b/src/safeds/data/tabular/query/_string_operations.py index ee42113d3..90e58321f 100644 --- a/src/safeds/data/tabular/query/_string_operations.py +++ b/src/safeds/data/tabular/query/_string_operations.py @@ -303,9 +303,9 @@ def substring( # TODO: add format parameter @abstractmethod - def to_date(self) -> Cell[datetime.date | None]: + def to_date(self, *, format: str | None = None) -> Cell[datetime.date | None]: """ - Convert the string value in the cell to a date. Requires the string to be in the ISO 8601 format. + Convert the string value in the cell to a date. Returns ------- @@ -331,7 +331,7 @@ def to_date(self) -> Cell[datetime.date | None]: # TODO: add format parameter @abstractmethod - def to_datetime(self) -> Cell[datetime.datetime | None]: + def to_datetime(self, *, format: str | None = None) -> Cell[datetime.datetime | None]: """ Convert the string value in the cell to a datetime. Requires the string to be in the ISO 8601 format. @@ -343,7 +343,7 @@ def to_datetime(self) -> Cell[datetime.datetime | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["2021-01-01T00:00:00z", "2021-02-01T00:00:00z", "abc", None]) + >>> column = Column("a", ["2021-01-01T00:00:00Z", "2021-02-01T00:00:00Z", "abc", None]) >>> column.transform(lambda cell: cell.str.to_datetime()) +-------------------------+ | a | @@ -357,6 +357,8 @@ def to_datetime(self) -> Cell[datetime.datetime | None]: +-------------------------+ """ + # TODO: add to_time + @abstractmethod def to_int(self, *, base: _ConvertibleToIntCell = 10) -> Cell[int | None]: """ diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_weekday.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_weekday.py index 9db08b4fc..9c62907a6 100644 --- a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_weekday.py +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_weekday.py @@ -17,4 +17,4 @@ ], ) def test_get_weekday(input_date: datetime.date, expected: bool) -> None: - assert_cell_operation_works(input_date, lambda cell: cell.dt.weekday(), expected) + assert_cell_operation_works(input_date, lambda cell: cell.dt.day_of_week(), expected) From 88bae5f3cd0e054fa916fd3723c66afc68374522 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sat, 18 Jan 2025 20:07:56 +0100 Subject: [PATCH 27/57] docs: add documentation for `dt.to_string` method --- ...onvert_and_check_datetime_format_module.py | 45 ++++++-- .../tabular/query/_datetime_operations.py | 105 ++++++++++++++++-- .../tabular/query/_lazy_string_operations.py | 6 +- .../data/tabular/query/_string_operations.py | 6 +- .../_lazy_datetime_operations/test_century.py | 20 ---- .../test_date_to_string.py | 45 -------- .../test_datetime_to_string.py | 34 ------ .../_lazy_datetime_operations/test_day.py | 20 ---- .../_lazy_datetime_operations/test_month.py | 20 ---- .../_lazy_datetime_operations/test_week.py | 20 ---- .../_lazy_datetime_operations/test_weekday.py | 20 ---- .../_lazy_datetime_operations/test_year.py | 20 ---- 12 files changed, 137 insertions(+), 224 deletions(-) delete mode 100644 tests/safeds/data/tabular/query/_lazy_datetime_operations/test_century.py delete mode 100644 tests/safeds/data/tabular/query/_lazy_datetime_operations/test_date_to_string.py delete mode 100644 tests/safeds/data/tabular/query/_lazy_datetime_operations/test_datetime_to_string.py delete mode 100644 tests/safeds/data/tabular/query/_lazy_datetime_operations/test_day.py delete mode 100644 tests/safeds/data/tabular/query/_lazy_datetime_operations/test_month.py delete mode 100644 tests/safeds/data/tabular/query/_lazy_datetime_operations/test_week.py delete mode 100644 tests/safeds/data/tabular/query/_lazy_datetime_operations/test_weekday.py delete mode 100644 tests/safeds/data/tabular/query/_lazy_datetime_operations/test_year.py diff --git a/src/safeds/_validation/_convert_and_check_datetime_format_module.py b/src/safeds/_validation/_convert_and_check_datetime_format_module.py index d68ad7c96..7bc249772 100644 --- a/src/safeds/_validation/_convert_and_check_datetime_format_module.py +++ b/src/safeds/_validation/_convert_and_check_datetime_format_module.py @@ -17,7 +17,7 @@ "_M": "_m", "^M": "-m", "M-full": "B", - "M-abbr": "b", + "M-short": "b", # Week "W": "V", "_W": "_V", @@ -28,8 +28,10 @@ "^D": "-d", "DOW": "u", "DOW-full": "A", - "DOW-abbr": "a", + "DOW-short": "a", "DOY": "j", + "_DOY": "_j", + "^DOY": "-j", } _TIME_REPLACEMENTS = { @@ -78,22 +80,44 @@ def _convert_and_check_datetime_format( - format: str, + format_: str, type_: Literal["datetime", "date", "time"], used_for_parsing: bool, ) -> str: + """ + Convert our datetime format string to a format string understood by polars and check for errors. + + Parameters + ---------- + format_: + The datetime format to convert. + type_: + Whether format is for a datetime, date, or time. + used_for_parsing: + Whether the format is used for parsing. + + Returns + ------- + converted_format: + The converted datetime format. + + Raises + ------ + ValueError + If the format is invalid. + """ replacements = _get_replacements(type_, used_for_parsing) converted_format = "" index = 0 - while index < len(format): - char = char_at(format, index) + while index < len(format_): + char = char_at(format_, index) # Escaped characters - if char == "\\" and char_at(format, index + 1) == "\\": + if char == "\\" and char_at(format_, index + 1) == "\\": converted_format += "\\" index += 2 - if char == "\\" and char_at(format, index + 1) == "{": + if char == "\\" and char_at(format_, index + 1) == "{": converted_format += "{" index += 2 # Characters that need to be escaped for rust's chrono crate @@ -108,11 +132,12 @@ def _convert_and_check_datetime_format( index += 1 # Template expression elif char == "{": - end_index = format.find("}", index) + end_index = format_.find("}", index) if end_index == -1: - raise ValueError(f"Unclosed template expression at index {index}.") + message = f"Unclosed template expression at index {index}." + raise ValueError(message) - expression = format[index + 1 : end_index] + expression = format_[index + 1 : end_index] converted_format += _convert_and_check_template_expression(expression, type_, replacements) index = end_index + 1 # Regular characters diff --git a/src/safeds/data/tabular/query/_datetime_operations.py b/src/safeds/data/tabular/query/_datetime_operations.py index eac241859..517dd0108 100644 --- a/src/safeds/data/tabular/query/_datetime_operations.py +++ b/src/safeds/data/tabular/query/_datetime_operations.py @@ -771,7 +771,7 @@ def replace( Examples -------- - >>> from datetime import datetime, date, time + >>> from datetime import datetime, date >>> from safeds.data.tabular.containers import Column >>> column1 = Column("a", [datetime(2000, 1, 1), None]) >>> column1.transform(lambda cell: cell.dt.replace(month=2, day=2, hour=2)) @@ -796,14 +796,61 @@ def replace( +------------+ """ - # TODO: explain format string + more examples @abstractmethod def to_string(self, *, format: str = "iso") -> Cell[str | None]: - """ + r""" Convert a datetime, date, or time to a string. - The format can be either the special value "iso" to create ISO 8601 strings or a custom format string. The - custom format string can contain the following placeholders: + The `format` parameter controls the presentation. It can be `"iso"` to target ISO 8601 or a custom string. The + custom string can contain fixed placeholders (see below), which are replaced with the corresponding values. The + placeholders are case-sensitive and always enclosed in curly braces. Other text is included in the output + verbatim. To include a literal opening curly brace, use `\{`, and to include a literal backslash, use `\\`. + + The following placeholders for _date components_ are available for **datetime** and **date**: + + - `{Y}`, `{_Y}`, `{^Y}`: Year (zero-padded to four digits, space-padded to four digits, no padding). + - `{Y99}`, `{_Y99}`, `{^Y99}`: Year modulo 100 (zero-padded to two digits, space-padded to two digits, no + padding). + - `{M}`, `{_M}`, `{^M}`: Month (zero-padded to two digits, space-padded to two digits, no padding). + - `{M-full}`: Full name of the month (e.g. "January"). + - `{M-short}`: Abbreviated name of the month with three letters (e.g. "Jan"). + - `{W}`, `{_W}`, `{^W}`: Week number as defined by ISO 8601 (zero-padded to two digits, space-padded to two + digits, no padding). + - `{D}`, `{_D}`, `{^D}`: Day of the month (zero-padded to two digits, space-padded to two digits, no padding). + - `{DOW}`: Day of the week as defined by ISO 8601 (1 = Monday, 7 = Sunday). + - `{DOW-full}`: Full name of the day of the week (e.g. "Monday"). + - `{DOW-short}`: Abbreviated name of the day of the week with three letters (e.g. "Mon"). + - `{DOY}`, `{_DOY}`, `{^DOY}`: Day of the year, ranging from 1 to 366 (zero-padded to three digits, space-padded + to three digits, no padding). + + The following placeholders for _time components_ are available for **datetime** and **time**: + + - `{h}`, `{_h}`, `{^h}`: Hour (zero-padded to two digits, space-padded to two digits, no padding). + - `{h12}`, `{_h12}`, `{^h12}`: Hour in 12-hour format (zero-padded to two digits, space-padded to two digits, no + padding). + - `{m}`, `{_m}`, `{^m}`: Minute (zero-padded to two digits, space-padded to two digits, no padding). + - `{s}`, `{_s}`, `{^s}`: Second (zero-padded to two digits, space-padded to two digits, no padding). + - `{.f}`: Fractional seconds with a leading decimal point. + - `{ms}`: Millisecond (zero-padded to three digits). + - `{us}`: Microsecond (zero-padded to six digits). + - `{ns}`: Nanosecond (zero-padded to nine digits). + - `{AM/PM}`: AM or PM (uppercase). + - `{am/pm}`: am or pm (lowercase). + + The following placeholders are available for **datetime** only: + + - `{z}`: Offset of the timezone from UTC without a colon (e.g. "+0000"). + - `{:z}`: Offset of the timezone from UTC with a colon (e.g. "+00:00"). + - `{u}`: The UNIX timestamp in seconds. + + The placeholders follow certain conventions: + + - Generally, date components use uppercase letters and time components use lowercase letters. + - If a component may be formatted in multiple ways, we use shorter placeholders for ISO 8601. Placeholders for + other formats have a prefix (same value with different padding, see below) or suffix (other differences). + - By default, value are zero-padded, where applicable. + - A leading underscore (`_`) means the value is space-padded. + - A leading caret (`^`) means the value has no padding (think of the caret in regular expressions). Parameters ---------- @@ -822,16 +869,56 @@ def to_string(self, *, format: str = "iso") -> Cell[str | None]: Examples -------- + >>> from datetime import datetime, date >>> from safeds.data.tabular.containers import Column - >>> import datetime - >>> column = Column("a", [datetime.date(2022, 1, 9)]) - >>> column.transform(lambda cell: cell.dt.to_string()) + >>> column1 = Column("a", [datetime(1999, 12, 31), datetime(2000, 1, 1, 12, 30, 0), None]) + >>> column1.transform(lambda cell: cell.dt.to_string()) + +----------------------------+ + | a | + | --- | + | str | + +============================+ + | 1999-12-31T00:00:00.000000 | + | 2000-01-01T12:30:00.000000 | + | null | + +----------------------------+ + + >>> column1.transform(lambda cell: cell.dt.to_string( + ... format="{DOW-short} {D}-{M-short}-{Y} {h12}:{m}:{s} {AM/PM}" + ... )) + +-----------------------------+ + | a | + | --- | + | str | + +=============================+ + | Fri 31-Dec-1999 12:00:00 AM | + | Sat 01-Jan-2000 12:30:00 PM | + | null | + +-----------------------------+ + + >>> column2 = Column("a", [date(1999, 12, 31), date(2000, 1, 1), None]) + >>> column2.transform(lambda cell: cell.dt.to_string()) + +------------+ + | a | + | --- | + | str | + +============+ + | 1999-12-31 | + | 2000-01-01 | + | null | + +------------+ + + >>> column2.transform(lambda cell: cell.dt.to_string( + ... format="{M}/{D}/{Y}" + ... )) +------------+ | a | | --- | | str | +============+ - | 2022-01-09 | + | 12/31/1999 | + | 01/01/2000 | + | null | +------------+ """ diff --git a/src/safeds/data/tabular/query/_lazy_string_operations.py b/src/safeds/data/tabular/query/_lazy_string_operations.py index d080b913e..cb9a2790a 100644 --- a/src/safeds/data/tabular/query/_lazy_string_operations.py +++ b/src/safeds/data/tabular/query/_lazy_string_operations.py @@ -80,7 +80,7 @@ def substring( def to_date(self, *, format: str | None = None) -> Cell[datetime.date | None]: if format == "iso": format = "%F" # noqa: A001 - else: + elif format is not None: format = _convert_and_check_datetime_format(format, type_="date", used_for_parsing=True) # noqa: A001 return _LazyCell(self._expression.str.to_date(format=format, strict=False)) @@ -88,7 +88,7 @@ def to_date(self, *, format: str | None = None) -> Cell[datetime.date | None]: def to_datetime(self, *, format: str | None = None) -> Cell[datetime.datetime | None]: if format == "iso": format = "%+" # noqa: A001 - else: + elif format is not None: format = _convert_and_check_datetime_format(format, type_="datetime", used_for_parsing=True) # noqa: A001 return _LazyCell(self._expression.str.to_datetime(format=format, strict=False)) @@ -102,7 +102,7 @@ def to_lowercase(self) -> Cell[str | None]: def to_time(self, *, format: str | None = None) -> Cell[datetime.time | None]: if format == "iso": format = "%T" # noqa: A001 - else: + elif format is not None: format = _convert_and_check_datetime_format(format, type_="time", used_for_parsing=True) # noqa: A001 return _LazyCell(self._expression.str.to_time(format=format, strict=False)) diff --git a/src/safeds/data/tabular/query/_string_operations.py b/src/safeds/data/tabular/query/_string_operations.py index 90e58321f..0284d6029 100644 --- a/src/safeds/data/tabular/query/_string_operations.py +++ b/src/safeds/data/tabular/query/_string_operations.py @@ -301,7 +301,7 @@ def substring( +------+ """ - # TODO: add format parameter + # TODO: add format parameter + document @abstractmethod def to_date(self, *, format: str | None = None) -> Cell[datetime.date | None]: """ @@ -329,11 +329,11 @@ def to_date(self, *, format: str | None = None) -> Cell[datetime.date | None]: +------------+ """ - # TODO: add format parameter + # TODO: add format parameter + document @abstractmethod def to_datetime(self, *, format: str | None = None) -> Cell[datetime.datetime | None]: """ - Convert the string value in the cell to a datetime. Requires the string to be in the ISO 8601 format. + Convert the string value in the cell to a datetime. Returns ------- diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_century.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_century.py deleted file mode 100644 index 2d36808b6..000000000 --- a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_century.py +++ /dev/null @@ -1,20 +0,0 @@ -import datetime - -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("expected", "input_date"), - [ - (18, datetime.datetime(1800, 1, 9, 23, 29, 1, tzinfo=datetime.UTC)), - (21, datetime.date(2022, 1, 1)), - ], - ids=[ - "ISO datetime", - "ISO date", - ], -) -def test_get_day(input_date: datetime.date, expected: bool) -> None: - assert_cell_operation_works(input_date, lambda cell: cell.dt.century(), expected) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_date_to_string.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_date_to_string.py deleted file mode 100644 index 7418c167e..000000000 --- a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_date_to_string.py +++ /dev/null @@ -1,45 +0,0 @@ -import datetime - -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("expected", "input_date", "format_string"), - [ - ("2022-01-09", datetime.date(2022, 1, 9), "%F"), - ("2022/01/09", datetime.date(2022, 1, 9), "%Y/%m/%d"), - ], - ids=[ - "ISO date", - "ISO date format", - ], -) -def test_should_parse_date_to_string(input_date: datetime.date, expected: bool, format_string: str) -> None: - assert_cell_operation_works(input_date, lambda cell: cell.dt.date_to_string(format_string), expected) - - -@pytest.mark.parametrize( - ("expected", "input_date", "format_string"), - [ - ( - "Invalid format string", - datetime.date(2022, 1, 9), - "%9", - ), - ( - "Invalid format string", - datetime.date(2022, 1, 9), - "%Y%", - ), - ], - ids=["ISO datetime false", "ISO datetime false % at end"], -) -def test_should_raise_value_error_when_input_date_is_invalid( - input_date: datetime.date, - expected: str, - format_string: str, -) -> None: - with pytest.raises(ValueError, match=expected): - assert_cell_operation_works(input_date, lambda cell: cell.dt.date_to_string(format_string), expected) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_datetime_to_string.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_datetime_to_string.py deleted file mode 100644 index 942fef5bc..000000000 --- a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_datetime_to_string.py +++ /dev/null @@ -1,34 +0,0 @@ -import datetime - -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("expected", "input_date", "format_string"), - [ - ("2022/01/09 23:29:01", datetime.datetime(2022, 1, 9, 23, 29, 1, tzinfo=datetime.UTC), "%Y/%m/%d %H:%M:%S"), - ("2022:01:09 23/29/01", datetime.datetime(2022, 1, 9, 23, 29, 1, tzinfo=datetime.UTC), "%Y:%m:%d %H/%M/%S"), - ], - ids=[ - "ISO datetime", - "ISO datetime format", - ], -) -def test_should_parse_date_to_string(input_date: datetime.date, expected: bool, format_string: str) -> None: - assert_cell_operation_works(input_date, lambda cell: cell.dt.datetime_to_string(format_string), expected) - - -@pytest.mark.parametrize( - ("expected", "input_date"), - [ - ("Invalid format string", datetime.datetime(2022, 1, 9, 23, 29, 1, tzinfo=datetime.UTC)), - ], - ids=[ - "ISO datetime false", - ], -) -def test_should_raise_value_error_when_input_date_is_invalid(input_date: datetime.date, expected: str) -> None: - with pytest.raises(ValueError, match=expected): - assert_cell_operation_works(input_date, lambda cell: cell.dt.datetime_to_string("%9"), expected) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_day.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_day.py deleted file mode 100644 index afa9c588b..000000000 --- a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_day.py +++ /dev/null @@ -1,20 +0,0 @@ -import datetime - -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("expected", "input_date"), - [ - (9, datetime.datetime(2022, 1, 9, 23, 29, 1, tzinfo=datetime.UTC)), - (1, datetime.date(2022, 1, 1)), - ], - ids=[ - "ISO datetime", - "ISO date", - ], -) -def test_get_day(input_date: datetime.date, expected: bool) -> None: - assert_cell_operation_works(input_date, lambda cell: cell.dt.day(), expected) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_month.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_month.py deleted file mode 100644 index 626dff546..000000000 --- a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_month.py +++ /dev/null @@ -1,20 +0,0 @@ -import datetime - -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("expected", "input_date"), - [ - (3, datetime.datetime(2022, 3, 9, 23, 29, 1, tzinfo=datetime.UTC)), - (1, datetime.date(2022, 1, 1)), - ], - ids=[ - "ISO datetime", - "ISO date", - ], -) -def test_get_month(input_date: datetime.date, expected: bool) -> None: - assert_cell_operation_works(input_date, lambda cell: cell.dt.month(), expected) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_week.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_week.py deleted file mode 100644 index 3a6c7fd60..000000000 --- a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_week.py +++ /dev/null @@ -1,20 +0,0 @@ -import datetime - -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("expected", "input_date"), - [ - (10, datetime.datetime(2023, 3, 9, 23, 29, 1, tzinfo=datetime.UTC)), - (52, datetime.date(2022, 1, 1)), - ], - ids=[ - "ISO datetime", - "ISO date", - ], -) -def test_get_week(input_date: datetime.date, expected: bool) -> None: - assert_cell_operation_works(input_date, lambda cell: cell.dt.week(), expected) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_weekday.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_weekday.py deleted file mode 100644 index 9c62907a6..000000000 --- a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_weekday.py +++ /dev/null @@ -1,20 +0,0 @@ -import datetime - -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("expected", "input_date"), - [ - (4, datetime.datetime(2023, 3, 9, 23, 29, 1, tzinfo=datetime.UTC)), - (6, datetime.date(2022, 1, 1)), - ], - ids=[ - "ISO datetime", - "ISO date", - ], -) -def test_get_weekday(input_date: datetime.date, expected: bool) -> None: - assert_cell_operation_works(input_date, lambda cell: cell.dt.day_of_week(), expected) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_year.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_year.py deleted file mode 100644 index e35810e52..000000000 --- a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_year.py +++ /dev/null @@ -1,20 +0,0 @@ -import datetime - -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("expected", "input_date"), - [ - (2023, datetime.datetime(2023, 3, 9, 23, 29, 1, tzinfo=datetime.UTC)), - (2022, datetime.date(2022, 1, 1)), - ], - ids=[ - "ISO datetime", - "ISO date", - ], -) -def test_get_year(input_date: datetime.date, expected: bool) -> None: - assert_cell_operation_works(input_date, lambda cell: cell.dt.year(), expected) From ef8d2b9cc553fae3a74cb1542398240a97398f94 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 19 Jan 2025 11:11:00 +0100 Subject: [PATCH 28/57] test: `unix_timestamp` --- .../tabular/query/_datetime_operations.py | 24 ++++++++++-- .../query/_lazy_datetime_operations.py | 6 +-- .../test_unix_timestamp.py | 39 +++++++++++++++++++ 3 files changed, 62 insertions(+), 7 deletions(-) create mode 100644 tests/safeds/data/tabular/query/_lazy_datetime_operations/test_unix_timestamp.py diff --git a/src/safeds/data/tabular/query/_datetime_operations.py b/src/safeds/data/tabular/query/_datetime_operations.py index 517dd0108..01864e480 100644 --- a/src/safeds/data/tabular/query/_datetime_operations.py +++ b/src/safeds/data/tabular/query/_datetime_operations.py @@ -1,7 +1,7 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Literal if TYPE_CHECKING: import datetime as python_datetime @@ -923,12 +923,17 @@ def to_string(self, *, format: str = "iso") -> Cell[str | None]: """ @abstractmethod - def unix_timestamp(self) -> Cell[int | None]: + def unix_timestamp(self, *, unit: Literal["s", "ms", "us"] = "s") -> Cell[int | None]: """ Get the Unix timestamp from a datetime. - A Unix timestamp is the elapsed time since 00:00:00 UTC on 1 January 1970. This method returns the value in - seconds. + A Unix timestamp is the elapsed time since 00:00:00 UTC on 1 January 1970. By default, this method returns the + value in seconds, but that can be changed with the `unit` parameter. + + Parameters + ---------- + unit: + The unit of the timestamp. Can be "s" (seconds), "ms" (milliseconds), or "us" (microseconds). Returns ------- @@ -950,4 +955,15 @@ def unix_timestamp(self) -> Cell[int | None]: | 86400 | | null | +-------+ + + >>> column.transform(lambda cell: cell.dt.unix_timestamp(unit="ms")) + +----------+ + | a | + | --- | + | i64 | + +==========+ + | 0 | + | 86400000 | + | null | + +----------+ """ diff --git a/src/safeds/data/tabular/query/_lazy_datetime_operations.py b/src/safeds/data/tabular/query/_lazy_datetime_operations.py index 062c1fd90..298c8fdb4 100644 --- a/src/safeds/data/tabular/query/_lazy_datetime_operations.py +++ b/src/safeds/data/tabular/query/_lazy_datetime_operations.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Literal from safeds._utils import _structural_hash from safeds._validation import _convert_and_check_datetime_format @@ -134,5 +134,5 @@ def to_string(self, *, format: str = "iso") -> Cell[str | None]: return _LazyCell(self._expression.dt.to_string(format=format)) - def unix_timestamp(self) -> Cell[int | None]: - return _LazyCell(self._expression.dt.epoch(time_unit="s")) + def unix_timestamp(self, *, unit: Literal["s", "ms", "us"] = "s") -> Cell[int | None]: + return _LazyCell(self._expression.dt.epoch(time_unit=unit)) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_unix_timestamp.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_unix_timestamp.py new file mode 100644 index 000000000..2bab01580 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_unix_timestamp.py @@ -0,0 +1,39 @@ +from datetime import UTC, datetime +from typing import Literal + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "unit", "expected"), + [ + (datetime(1970, 1, 1, tzinfo=UTC), "s", 0), + (datetime(1969, 12, 31, tzinfo=UTC), "s", -86400), + (datetime(1970, 1, 2, tzinfo=UTC), "s", 86400), + (datetime(1970, 1, 2, tzinfo=UTC), "ms", 86400000), + (datetime(1970, 1, 2, tzinfo=UTC), "us", 86400000000), + (None, "s", None), + ], + ids=[ + "epoch", + "one day before epoch", + "one day after epoch (seconds)", + "one day after epoch (milliseconds)", + "one day after epoch (microseconds)", + "None", + ], +) +def test_should_return_unix_timestamp( + value: datetime | None, + unit: Literal["s", "ms", "us"], + expected: int | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.unix_timestamp(unit=unit), + expected, + type_if_none=ColumnType.datetime(), + ) From 8e2f40c0f164cc40b5f8c7bf5a0d8eb3a2bff3fa Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 19 Jan 2025 11:14:14 +0100 Subject: [PATCH 29/57] test: `is_in_leap_year` --- .../test_is_in_leap_year.py | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 tests/safeds/data/tabular/query/_lazy_datetime_operations/test_is_in_leap_year.py diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_is_in_leap_year.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_is_in_leap_year.py new file mode 100644 index 000000000..849346dc7 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_is_in_leap_year.py @@ -0,0 +1,43 @@ +from datetime import UTC, date, datetime + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (datetime(1999, 1, 1, tzinfo=UTC), False), + (datetime(1996, 1, 1, tzinfo=UTC), True), + (datetime(1900, 1, 1, tzinfo=UTC), False), + (datetime(2000, 1, 1, tzinfo=UTC), True), + (date(1999, 1, 1), False), + (date(1996, 1, 1), True), + (date(1900, 1, 1), False), + (date(2000, 1, 1), True), + (None, None), + ], + ids=[ + "datetime - not divisible by 4", + "datetime - divisible by 4", + "datetime - divisible by 100", + "datetime - divisible by 400", + "date - not divisible by 4", + "date - divisible by 4", + "date - divisible by 100", + "date - divisible by 400", + "None", + ], +) +def test_should_return_unix_timestamp( + value: datetime | None, + expected: int | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.is_in_leap_year(), + expected, + type_if_none=ColumnType.datetime(), + ) From 2ff0e923482907fba1c7b15d156b9fbffdeb3042 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 19 Jan 2025 11:15:11 +0100 Subject: [PATCH 30/57] test: `year` --- .../_lazy_datetime_operations/test_year.py | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 tests/safeds/data/tabular/query/_lazy_datetime_operations/test_year.py diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_year.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_year.py new file mode 100644 index 000000000..8a60cf5b2 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_year.py @@ -0,0 +1,31 @@ +from datetime import UTC, date, datetime + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (datetime(1999, 1, 1, tzinfo=UTC), 1999), + (date(1999, 1, 1), 1999), + (None, None), + ], + ids=[ + "datetime", + "date", + "None", + ], +) +def test_should_return_unix_timestamp( + value: datetime | None, + expected: int | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.year(), + expected, + type_if_none=ColumnType.datetime(), + ) From 5e3d2b64d5daaf8f2b8e49b2d5fc56b1f45b7210 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 19 Jan 2025 11:16:50 +0100 Subject: [PATCH 31/57] test: `month` --- .../test_is_in_leap_year.py | 2 +- .../_lazy_datetime_operations/test_month.py | 31 +++++++++++++++++++ .../_lazy_datetime_operations/test_year.py | 2 +- 3 files changed, 33 insertions(+), 2 deletions(-) create mode 100644 tests/safeds/data/tabular/query/_lazy_datetime_operations/test_month.py diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_is_in_leap_year.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_is_in_leap_year.py index 849346dc7..39e5bc275 100644 --- a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_is_in_leap_year.py +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_is_in_leap_year.py @@ -31,7 +31,7 @@ "None", ], ) -def test_should_return_unix_timestamp( +def test_should_check_if_is_in_leap_date( value: datetime | None, expected: int | None, ) -> None: diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_month.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_month.py new file mode 100644 index 000000000..4dfb8a9ab --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_month.py @@ -0,0 +1,31 @@ +from datetime import UTC, date, datetime + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (datetime(1999, 3, 1, tzinfo=UTC), 3), + (date(1999, 3, 1), 3), + (None, None), + ], + ids=[ + "datetime", + "date", + "None", + ], +) +def test_should_return_month( + value: datetime | None, + expected: int | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.month(), + expected, + type_if_none=ColumnType.datetime(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_year.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_year.py index 8a60cf5b2..835a23ba4 100644 --- a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_year.py +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_year.py @@ -19,7 +19,7 @@ "None", ], ) -def test_should_return_unix_timestamp( +def test_should_extract_year( value: datetime | None, expected: int | None, ) -> None: From d6ddde607d59c5a5d74eed8f26019ce4eec49979 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 19 Jan 2025 11:20:18 +0100 Subject: [PATCH 32/57] test: `day` --- .../_lazy_datetime_operations/test_day.py | 31 +++++++++++++++++++ .../_lazy_datetime_operations/test_month.py | 6 ++-- .../_lazy_datetime_operations/test_year.py | 4 +-- 3 files changed, 36 insertions(+), 5 deletions(-) create mode 100644 tests/safeds/data/tabular/query/_lazy_datetime_operations/test_day.py diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_day.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_day.py new file mode 100644 index 000000000..17ddc4e11 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_day.py @@ -0,0 +1,31 @@ +from datetime import UTC, date, datetime + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (datetime(1, 2, 3, tzinfo=UTC), 3), + (date(1, 2, 3), 3), + (None, None), + ], + ids=[ + "datetime", + "date", + "None", + ], +) +def test_should_extract_day( + value: datetime | None, + expected: int | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.day(), + expected, + type_if_none=ColumnType.datetime(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_month.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_month.py index 4dfb8a9ab..952457d09 100644 --- a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_month.py +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_month.py @@ -9,8 +9,8 @@ @pytest.mark.parametrize( ("value", "expected"), [ - (datetime(1999, 3, 1, tzinfo=UTC), 3), - (date(1999, 3, 1), 3), + (datetime(1, 2, 3, tzinfo=UTC), 2), + (date(1, 2, 3), 2), (None, None), ], ids=[ @@ -19,7 +19,7 @@ "None", ], ) -def test_should_return_month( +def test_should_extract_month( value: datetime | None, expected: int | None, ) -> None: diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_year.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_year.py index 835a23ba4..19ce7421e 100644 --- a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_year.py +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_year.py @@ -9,8 +9,8 @@ @pytest.mark.parametrize( ("value", "expected"), [ - (datetime(1999, 1, 1, tzinfo=UTC), 1999), - (date(1999, 1, 1), 1999), + (datetime(1, 2, 3, tzinfo=UTC), 1), + (date(1, 2, 3), 1), (None, None), ], ids=[ From 0410538505c9e5038fa31f19b4e0bf01570d2ecc Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 19 Jan 2025 11:23:09 +0100 Subject: [PATCH 33/57] test: `date` --- .../_lazy_datetime_operations/test_date.py | 29 +++++++++++++++++++ .../_lazy_datetime_operations/test_day.py | 2 +- .../test_is_in_leap_year.py | 2 +- .../_lazy_datetime_operations/test_month.py | 2 +- .../_lazy_datetime_operations/test_year.py | 2 +- 5 files changed, 33 insertions(+), 4 deletions(-) create mode 100644 tests/safeds/data/tabular/query/_lazy_datetime_operations/test_date.py diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_date.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_date.py new file mode 100644 index 000000000..0439b61a7 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_date.py @@ -0,0 +1,29 @@ +from datetime import UTC, date, datetime + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (datetime(1, 2, 3, tzinfo=UTC), date(1, 2, 3)), + (None, None), + ], + ids=[ + "datetime", + "None", + ], +) +def test_should_extract_date( + value: datetime | None, + expected: date | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.date(), + expected, + type_if_none=ColumnType.datetime(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_day.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_day.py index 17ddc4e11..4c53d8e2d 100644 --- a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_day.py +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_day.py @@ -20,7 +20,7 @@ ], ) def test_should_extract_day( - value: datetime | None, + value: datetime | date | None, expected: int | None, ) -> None: assert_cell_operation_works( diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_is_in_leap_year.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_is_in_leap_year.py index 39e5bc275..2401c8174 100644 --- a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_is_in_leap_year.py +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_is_in_leap_year.py @@ -32,7 +32,7 @@ ], ) def test_should_check_if_is_in_leap_date( - value: datetime | None, + value: datetime | date | None, expected: int | None, ) -> None: assert_cell_operation_works( diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_month.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_month.py index 952457d09..6236f4a63 100644 --- a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_month.py +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_month.py @@ -20,7 +20,7 @@ ], ) def test_should_extract_month( - value: datetime | None, + value: datetime | date | None, expected: int | None, ) -> None: assert_cell_operation_works( diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_year.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_year.py index 19ce7421e..b7d38395b 100644 --- a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_year.py +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_year.py @@ -20,7 +20,7 @@ ], ) def test_should_extract_year( - value: datetime | None, + value: datetime | date | None, expected: int | None, ) -> None: assert_cell_operation_works( From 7d60af460eab03664ef2be61192676f01c020b4b Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 19 Jan 2025 11:43:00 +0100 Subject: [PATCH 34/57] test: `day_of_week` --- .../tabular/query/_datetime_operations.py | 31 ++++++----- .../test_day_of_week.py | 55 +++++++++++++++++++ .../_lazy_datetime_operations/test_time.py | 29 ++++++++++ 3 files changed, 100 insertions(+), 15 deletions(-) create mode 100644 tests/safeds/data/tabular/query/_lazy_datetime_operations/test_day_of_week.py create mode 100644 tests/safeds/data/tabular/query/_lazy_datetime_operations/test_time.py diff --git a/src/safeds/data/tabular/query/_datetime_operations.py b/src/safeds/data/tabular/query/_datetime_operations.py index 01864e480..224460f30 100644 --- a/src/safeds/data/tabular/query/_datetime_operations.py +++ b/src/safeds/data/tabular/query/_datetime_operations.py @@ -4,7 +4,8 @@ from typing import TYPE_CHECKING, Literal if TYPE_CHECKING: - import datetime as python_datetime + from datetime import date as python_date + from datetime import time as python_time from safeds._typing import _ConvertibleToIntCell from safeds.data.tabular.containers import Cell @@ -70,7 +71,7 @@ def century(self) -> Cell[int | None]: Examples -------- - >>> from datetime import datetime, date + >>> from datetime import date, datetime >>> from safeds.data.tabular.containers import Column >>> column1 = Column("a", [datetime(1999, 12, 31), datetime(2000, 1, 1), datetime(2001, 1, 1), None]) >>> column1.transform(lambda cell: cell.dt.century()) @@ -100,7 +101,7 @@ def century(self) -> Cell[int | None]: """ @abstractmethod - def date(self) -> Cell[python_datetime.date | None]: + def date(self) -> Cell[python_date | None]: """ Extract the date from a datetime. @@ -138,7 +139,7 @@ def day(self) -> Cell[int | None]: Examples -------- - >>> from datetime import datetime, date + >>> from datetime import date, datetime >>> from safeds.data.tabular.containers import Column >>> column1 = Column("a", [datetime(1999, 12, 31), datetime(2000, 1, 1), None]) >>> column1.transform(lambda cell: cell.dt.day()) @@ -179,7 +180,7 @@ def day_of_week(self) -> Cell[int | None]: Examples -------- - >>> from datetime import datetime, date + >>> from datetime import date, datetime >>> from safeds.data.tabular.containers import Column >>> column1 = Column("a", [datetime(2000, 1, 1), datetime(2000, 1, 2), None]) >>> column1.transform(lambda cell: cell.dt.day_of_week()) @@ -220,7 +221,7 @@ def day_of_year(self) -> Cell[int | None]: Examples -------- - >>> from datetime import datetime, date + >>> from datetime import date, datetime >>> from safeds.data.tabular.containers import Column >>> column1 = Column("a", [datetime(1999, 12, 31), datetime(2000, 1, 1), datetime(2000, 12, 31), None]) >>> column1.transform(lambda cell: cell.dt.day_of_year()) @@ -342,7 +343,7 @@ def millennium(self) -> Cell[int | None]: Examples -------- - >>> from datetime import datetime, date + >>> from datetime import date, datetime >>> from safeds.data.tabular.containers import Column >>> column1 = Column("a", [datetime(1999, 12, 31), datetime(2000, 1, 1), datetime(2001, 1, 1), None]) >>> column1.transform(lambda cell: cell.dt.millennium()) @@ -461,7 +462,7 @@ def month(self) -> Cell[int | None]: Examples -------- - >>> from datetime import datetime, date + >>> from datetime import date, datetime >>> from safeds.data.tabular.containers import Column >>> column1 = Column("a", [datetime(1999, 12, 31), datetime(2000, 1, 1), None]) >>> column1.transform(lambda cell: cell.dt.month()) @@ -507,7 +508,7 @@ def quarter(self) -> Cell[int | None]: Examples -------- - >>> from datetime import datetime, date + >>> from datetime import date, datetime >>> from safeds.data.tabular.containers import Column >>> column1 = Column("a", [datetime(1999, 12, 31), datetime(2000, 1, 1), datetime(2000, 4, 1), None]) >>> column1.transform(lambda cell: cell.dt.quarter()) @@ -576,7 +577,7 @@ def second(self) -> Cell[int | None]: """ @abstractmethod - def time(self) -> Cell[python_datetime.time | None]: + def time(self) -> Cell[python_time | None]: """ Extract the time from a datetime. @@ -618,7 +619,7 @@ def week(self) -> Cell[int | None]: Examples -------- - >>> from datetime import datetime, date + >>> from datetime import date, datetime >>> from safeds.data.tabular.containers import Column >>> column1 = Column("a", [datetime(1999, 12, 31), datetime(2000, 1, 2), datetime(2001, 12, 31), None]) >>> column1.transform(lambda cell: cell.dt.week()) @@ -659,7 +660,7 @@ def year(self) -> Cell[int | None]: Examples -------- - >>> from datetime import datetime, date + >>> from datetime import date, datetime >>> from safeds.data.tabular.containers import Column >>> column1 = Column("a", [datetime(1999, 12, 31), datetime(2000, 1, 1), None]) >>> column1.transform(lambda cell: cell.dt.year()) @@ -702,7 +703,7 @@ def is_in_leap_year(self) -> Cell[bool | None]: Examples -------- - >>> from datetime import datetime, date + >>> from datetime import date, datetime >>> from safeds.data.tabular.containers import Column >>> column1 = Column("a", [datetime(1900, 1, 1), datetime(2000, 1, 1), None]) >>> column1.transform(lambda cell: cell.dt.is_in_leap_year()) @@ -771,7 +772,7 @@ def replace( Examples -------- - >>> from datetime import datetime, date + >>> from datetime import date, datetime >>> from safeds.data.tabular.containers import Column >>> column1 = Column("a", [datetime(2000, 1, 1), None]) >>> column1.transform(lambda cell: cell.dt.replace(month=2, day=2, hour=2)) @@ -869,7 +870,7 @@ def to_string(self, *, format: str = "iso") -> Cell[str | None]: Examples -------- - >>> from datetime import datetime, date + >>> from datetime import date, datetime >>> from safeds.data.tabular.containers import Column >>> column1 = Column("a", [datetime(1999, 12, 31), datetime(2000, 1, 1, 12, 30, 0), None]) >>> column1.transform(lambda cell: cell.dt.to_string()) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_day_of_week.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_day_of_week.py new file mode 100644 index 000000000..cbd6470f5 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_day_of_week.py @@ -0,0 +1,55 @@ +from datetime import UTC, date, datetime + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (datetime(1, 1, 1, tzinfo=UTC), 1), + (datetime(1, 1, 2, tzinfo=UTC), 2), + (datetime(1, 1, 3, tzinfo=UTC), 3), + (datetime(1, 1, 4, tzinfo=UTC), 4), + (datetime(1, 1, 5, tzinfo=UTC), 5), + (datetime(1, 1, 6, tzinfo=UTC), 6), + (datetime(1, 1, 7, tzinfo=UTC), 7), + (date(1, 1, 1), 1), + (date(1, 1, 2), 2), + (date(1, 1, 3), 3), + (date(1, 1, 4), 4), + (date(1, 1, 5), 5), + (date(1, 1, 6), 6), + (date(1, 1, 7), 7), + (None, None), + ], + ids=[ + "datetime - Monday", + "datetime - Tuesday", + "datetime - Wednesday", + "datetime - Thursday", + "datetime - Friday", + "datetime - Saturday", + "datetime - Sunday", + "date - Monday", + "date - Tuesday", + "date - Wednesday", + "date - Thursday", + "date - Friday", + "date - Saturday", + "date - Sunday", + "None", + ], +) +def test_should_extract_day_of_week( + value: datetime | date | None, + expected: int | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.day_of_week(), + expected, + type_if_none=ColumnType.datetime(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_time.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_time.py new file mode 100644 index 000000000..c58d5321d --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_time.py @@ -0,0 +1,29 @@ +from datetime import UTC, datetime, time + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (datetime(1, 2, 3, 4, 5, 6, 7, tzinfo=UTC), time(4, 5, 6, 7)), + (None, None), + ], + ids=[ + "datetime", + "None", + ], +) +def test_should_extract_time( + value: datetime | None, + expected: time | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.time(), + expected, + type_if_none=ColumnType.datetime(), + ) From 7db62993a022f855837a09dc4919f5cc519f640b Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 19 Jan 2025 11:44:45 +0100 Subject: [PATCH 35/57] test: `day_of_year` --- .../test_day_of_year.py | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 tests/safeds/data/tabular/query/_lazy_datetime_operations/test_day_of_year.py diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_day_of_year.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_day_of_year.py new file mode 100644 index 000000000..eac185e1b --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_day_of_year.py @@ -0,0 +1,39 @@ +from datetime import UTC, date, datetime + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (datetime(1, 1, 1, tzinfo=UTC), 1), + (datetime(1, 12, 31, tzinfo=UTC), 365), + (datetime(4, 12, 31, tzinfo=UTC), 366), + (date(1, 1, 1), 1), + (date(1, 12, 31), 365), + (date(4, 12, 31), 366), + (None, None), + ], + ids=[ + "datetime - first", + "datetime - last in non-leap year", + "datetime - last in leap year", + "date - first", + "date - last in non-leap year", + "date - last in leap year", + "None", + ], +) +def test_should_extract_day_of_year( + value: datetime | date | None, + expected: int | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.day_of_year(), + expected, + type_if_none=ColumnType.datetime(), + ) From 9952ae58cdcf3ab28ca335baebe0fd2eeaaa240b Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 19 Jan 2025 11:50:32 +0100 Subject: [PATCH 36/57] test: `century` --- .../_lazy_datetime_operations/test_century.py | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 tests/safeds/data/tabular/query/_lazy_datetime_operations/test_century.py diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_century.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_century.py new file mode 100644 index 000000000..3a32f1f6e --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_century.py @@ -0,0 +1,39 @@ +from datetime import UTC, date, datetime + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (datetime(1, 1, 1, tzinfo=UTC), 1), + (datetime(100, 12, 31, tzinfo=UTC), 1), + (datetime(101, 1, 1, tzinfo=UTC), 2), + (date(1, 1, 1), 1), + (date(100, 12, 31), 1), + (date(101, 1, 1), 2), + (None, None), + ], + ids=[ + "datetime - first day of first century", + "datetime - last day of first century", + "datetime - first day of second century", + "date - first day of first century", + "date - last day of first century", + "date - first day of second century", + "None", + ], +) +def test_should_extract_century( + value: datetime | date | None, + expected: int | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.century(), + expected, + type_if_none=ColumnType.datetime(), + ) From 8152da205d2b3ea837fa69e6ae12f5918b3ec0fd Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 19 Jan 2025 11:51:11 +0100 Subject: [PATCH 37/57] test: `millennium` --- .../test_millennium.py | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 tests/safeds/data/tabular/query/_lazy_datetime_operations/test_millennium.py diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_millennium.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_millennium.py new file mode 100644 index 000000000..c7f790f10 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_millennium.py @@ -0,0 +1,39 @@ +from datetime import UTC, date, datetime + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (datetime(1, 1, 1, tzinfo=UTC), 1), + (datetime(1000, 12, 31, tzinfo=UTC), 1), + (datetime(1001, 1, 1, tzinfo=UTC), 2), + (date(1, 1, 1), 1), + (date(1000, 12, 31), 1), + (date(1001, 1, 1), 2), + (None, None), + ], + ids=[ + "datetime - first day of first millennium", + "datetime - last day of first millennium", + "datetime - first day of second millennium", + "date - first day of first millennium", + "date - last day of first millennium", + "date - first day of second millennium", + "None", + ], +) +def test_should_extract_millennium( + value: datetime | date | None, + expected: int | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.millennium(), + expected, + type_if_none=ColumnType.datetime(), + ) From d3b49c2dfb72fcba5fd1c788d857fefc478f3e76 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 19 Jan 2025 11:53:25 +0100 Subject: [PATCH 38/57] test: `quarter` --- .../_lazy_datetime_operations/test_quarter.py | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 tests/safeds/data/tabular/query/_lazy_datetime_operations/test_quarter.py diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_quarter.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_quarter.py new file mode 100644 index 000000000..af5dce5ac --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_quarter.py @@ -0,0 +1,59 @@ +from datetime import UTC, date, datetime + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (datetime(1, 1, 1, tzinfo=UTC), 1), + (datetime(1, 3, 31, tzinfo=UTC), 1), + (datetime(1, 4, 1, tzinfo=UTC), 2), + (datetime(1, 6, 30, tzinfo=UTC), 2), + (datetime(1, 7, 1, tzinfo=UTC), 3), + (datetime(1, 9, 30, tzinfo=UTC), 3), + (datetime(1, 10, 1, tzinfo=UTC), 4), + (datetime(1, 12, 31, tzinfo=UTC), 4), + (date(1, 1, 1), 1), + (date(1, 3, 31), 1), + (date(1, 4, 1), 2), + (date(1, 6, 30), 2), + (date(1, 7, 1), 3), + (date(1, 9, 30), 3), + (date(1, 10, 1), 4), + (date(1, 12, 31), 4), + (None, None), + ], + ids=[ + "datetime - first day of first quarter", + "datetime - last day of first quarter", + "datetime - first day of second quarter", + "datetime - last day of second quarter", + "datetime - first day of third quarter", + "datetime - last day of third quarter", + "datetime - first day of fourth quarter", + "datetime - last day of fourth quarter", + "date - first day of first quarter", + "date - last day of first quarter", + "date - first day of second quarter", + "date - last day of second quarter", + "date - first day of third quarter", + "date - last day of third quarter", + "date - first day of fourth quarter", + "date - last day of fourth quarter", + "None", + ], +) +def test_should_extract_quarter( + value: datetime | date | None, + expected: int | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.quarter(), + expected, + type_if_none=ColumnType.datetime(), + ) From c260e01df74230f769c57cd8acc60ad31723028c Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 19 Jan 2025 12:09:24 +0100 Subject: [PATCH 39/57] test: `week` --- .../_lazy_datetime_operations/test_week.py | 115 ++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 tests/safeds/data/tabular/query/_lazy_datetime_operations/test_week.py diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_week.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_week.py new file mode 100644 index 000000000..bee495dec --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_week.py @@ -0,0 +1,115 @@ +from datetime import UTC, date, datetime + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + # datetime - first day is Monday + (datetime(2024, 1, 1, tzinfo=UTC), 1), + # datetime - first day is Tuesday + (datetime(2030, 1, 1, tzinfo=UTC), 1), + # datetime - first day is Wednesday + (datetime(2025, 1, 1, tzinfo=UTC), 1), + # datetime - first day is Thursday + (datetime(2026, 1, 1, tzinfo=UTC), 1), + # datetime - first day is Friday + (datetime(2027, 1, 1, tzinfo=UTC), 53), + # datetime - first day is Saturday + (datetime(2028, 1, 1, tzinfo=UTC), 52), + # datetime - first day is Sunday + (datetime(2023, 1, 1, tzinfo=UTC), 52), + # datetime - last day is Monday + (datetime(2029, 12, 31, tzinfo=UTC), 1), + # datetime - last day is Tuesday + (datetime(2024, 12, 31, tzinfo=UTC), 1), + # datetime - last day is Wednesday + (datetime(2025, 12, 31, tzinfo=UTC), 1), + # datetime - last day is Thursday + (datetime(2026, 12, 31, tzinfo=UTC), 53), + # datetime - last day is Friday + (datetime(2027, 12, 31, tzinfo=UTC), 52), + # datetime - last day is Saturday + (datetime(2022, 12, 31, tzinfo=UTC), 52), + # datetime - last day is Sunday + (datetime(2023, 12, 31, tzinfo=UTC), 52), + # date - first day is Monday + (date(2024, 1, 1), 1), + # date - first day is Tuesday + (date(2030, 1, 1), 1), + # date - first day is Wednesday + (date(2025, 1, 1), 1), + # date - first day is Thursday + (date(2026, 1, 1), 1), + # date - first day is Friday + (date(2027, 1, 1), 53), + # date - first day is Saturday + (date(2028, 1, 1), 52), + # date - first day is Sunday + (date(2023, 1, 1), 52), + # date - last day is Monday + (date(2029, 12, 31), 1), + # date - last day is Tuesday + (date(2024, 12, 31), 1), + # date - last day is Wednesday + (date(2025, 12, 31), 1), + # date - last day is Thursday + (date(2026, 12, 31), 53), + # date - last day is Friday + (date(2027, 12, 31), 52), + # date - last day is Saturday + (date(2022, 12, 31), 52), + # date - last day is Sunday + (date(2023, 12, 31), 52), + # None + (None, None), + ], + ids=[ + # datetime + "datetime - first day is Monday", + "datetime - first day is Tuesday", + "datetime - first day is Wednesday", + "datetime - first day is Thursday", + "datetime - first day is Friday", + "datetime - first day is Saturday", + "datetime - first day is Sunday", + "datetime - last day is Monday", + "datetime - last day is Tuesday", + "datetime - last day is Wednesday", + "datetime - last day is Thursday", + "datetime - last day is Friday", + "datetime - last day is Saturday", + "datetime - last day is Sunday", + # date + "date - first day is Monday", + "date - first day is Tuesday", + "date - first day is Wednesday", + "date - first day is Thursday", + "date - first day is Friday", + "date - first day is Saturday", + "date - first day is Sunday", + "date - last day is Monday", + "date - last day is Tuesday", + "date - last day is Wednesday", + "date - last day is Thursday", + "date - last day is Friday", + "date - last day is Saturday", + "date - last day is Sunday", + # None + "None", + ], +) +def test_should_extract_week( + value: datetime | date | None, + expected: int | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.week(), + expected, + type_if_none=ColumnType.datetime(), + ) From bdbbcd5554318ccae5f5ce6a7553d05f324378f2 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 19 Jan 2025 12:10:49 +0100 Subject: [PATCH 40/57] test: `second` --- .../_lazy_datetime_operations/test_second.py | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 tests/safeds/data/tabular/query/_lazy_datetime_operations/test_second.py diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_second.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_second.py new file mode 100644 index 000000000..d0ee3c0c3 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_second.py @@ -0,0 +1,31 @@ +from datetime import UTC, datetime, time + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (datetime(1, 2, 3, 4, 5, 6, 7, tzinfo=UTC), 6), + (time(4, 5, 6, 7), 6), + (None, None), + ], + ids=[ + "datetime", + "time", + "None", + ], +) +def test_should_extract_second( + value: datetime | time | None, + expected: int | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.second(), + expected, + type_if_none=ColumnType.datetime(), + ) From 13850930f5a621fbdb40c61bd7842ee4a9e6f60f Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 19 Jan 2025 12:11:19 +0100 Subject: [PATCH 41/57] test: `minute` --- .../_lazy_datetime_operations/test_minute.py | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 tests/safeds/data/tabular/query/_lazy_datetime_operations/test_minute.py diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_minute.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_minute.py new file mode 100644 index 000000000..b3bf1de5f --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_minute.py @@ -0,0 +1,31 @@ +from datetime import UTC, datetime, time + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (datetime(1, 2, 3, 4, 5, 6, 7, tzinfo=UTC), 5), + (time(4, 5, 6, 7), 5), + (None, None), + ], + ids=[ + "datetime", + "time", + "None", + ], +) +def test_should_extract_minute( + value: datetime | time | None, + expected: int | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.minute(), + expected, + type_if_none=ColumnType.datetime(), + ) From fe9c868dc47d6e1e7f291444b1e3eb8006adac7e Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 19 Jan 2025 12:12:49 +0100 Subject: [PATCH 42/57] test: `hour` --- .../_lazy_datetime_operations/test_hour.py | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 tests/safeds/data/tabular/query/_lazy_datetime_operations/test_hour.py diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_hour.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_hour.py new file mode 100644 index 000000000..089c8ddc1 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_hour.py @@ -0,0 +1,31 @@ +from datetime import UTC, datetime, time + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (datetime(1, 2, 3, 4, 5, 6, 7, tzinfo=UTC), 4), + (time(4, 5, 6, 7), 4), + (None, None), + ], + ids=[ + "datetime", + "time", + "None", + ], +) +def test_should_extract_hour( + value: datetime | time | None, + expected: int | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.hour(), + expected, + type_if_none=ColumnType.datetime(), + ) From c29f7a4af20fd9245b1b3f07001969db0f05851c Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 19 Jan 2025 12:14:53 +0100 Subject: [PATCH 43/57] test: `microsecond` --- .../test_microsecond.py | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 tests/safeds/data/tabular/query/_lazy_datetime_operations/test_microsecond.py diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_microsecond.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_microsecond.py new file mode 100644 index 000000000..3cb03fcac --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_microsecond.py @@ -0,0 +1,31 @@ +from datetime import UTC, datetime, time + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (datetime(1, 2, 3, 4, 5, 6, 7, tzinfo=UTC), 7), + (time(4, 5, 6, 7), 7), + (None, None), + ], + ids=[ + "datetime", + "time", + "None", + ], +) +def test_should_extract_microsecond( + value: datetime | time | None, + expected: int | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.microsecond(), + expected, + type_if_none=ColumnType.datetime(), + ) From 28d78ff08d13a1da1a43585a3d981960ca432135 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 19 Jan 2025 12:17:58 +0100 Subject: [PATCH 44/57] test: `millisecond` --- .../test_millisecond.py | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 tests/safeds/data/tabular/query/_lazy_datetime_operations/test_millisecond.py diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_millisecond.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_millisecond.py new file mode 100644 index 000000000..134211ee4 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_millisecond.py @@ -0,0 +1,35 @@ +from datetime import UTC, datetime, time + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (datetime(1, 2, 3, 4, 5, 6, 7000, tzinfo=UTC), 7), + (datetime(1, 2, 3, 4, 5, 6, 999, tzinfo=UTC), 0), + (time(4, 5, 6, 7000), 7), + (time(4, 5, 6, 999), 0), + (None, None), + ], + ids=[ + "datetime - with milliseconds", + "datetime - without full milliseconds", + "time - with milliseconds", + "time - without full milliseconds", + "None", + ], +) +def test_should_extract_millisecond( + value: datetime | time | None, + expected: int | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.millisecond(), + expected, + type_if_none=ColumnType.datetime(), + ) From e29c00a6d4ce2f7519dd5fb8560adbc798e1c9f3 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 19 Jan 2025 12:27:34 +0100 Subject: [PATCH 45/57] test: `replace` --- .../_lazy_datetime_operations/test_replace.py | 127 ++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 tests/safeds/data/tabular/query/_lazy_datetime_operations/test_replace.py diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_replace.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_replace.py new file mode 100644 index 000000000..a201398e0 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_replace.py @@ -0,0 +1,127 @@ +from datetime import UTC, date, datetime + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + +DATETIME = datetime(1, 2, 3, 4, 5, 6, 7, tzinfo=UTC) +DATE = date(1, 2, 3) + + +@pytest.mark.parametrize( + ("value", "year", "month", "day", "hour", "minute", "second", "microsecond", "expected"), + [ + # datetime - change year + (DATETIME, 10, None, None, None, None, None, None, datetime(10, 2, 3, 4, 5, 6, 7, tzinfo=UTC)), + # datetime - change month (valid) + (DATETIME, None, 10, None, None, None, None, None, datetime(1, 10, 3, 4, 5, 6, 7, tzinfo=UTC)), + # datetime - change month (invalid) + (DATETIME, None, 13, None, None, None, None, None, None), + # datetime - change day (valid) + (DATETIME, None, None, 10, None, None, None, None, datetime(1, 2, 10, 4, 5, 6, 7, tzinfo=UTC)), + # datetime - change day (invalid) + (DATETIME, None, None, 32, None, None, None, None, None), + # datetime - change hour (valid) + (DATETIME, None, None, None, 10, None, None, None, datetime(1, 2, 3, 10, 5, 6, 7, tzinfo=UTC)), + # datetime - change hour (invalid) + (DATETIME, None, None, None, 24, None, None, None, None), + # datetime - change minute (valid) + (DATETIME, None, None, None, None, 10, None, None, datetime(1, 2, 3, 4, 10, 6, 7, tzinfo=UTC)), + # datetime - change minute (invalid) + (DATETIME, None, None, None, None, 60, None, None, None), + # datetime - change second (valid) + (DATETIME, None, None, None, None, None, 10, None, datetime(1, 2, 3, 4, 5, 10, 7, tzinfo=UTC)), + # datetime - change second (invalid) + (DATETIME, None, None, None, None, None, 60, None, None), + # datetime - change microsecond (valid) + (DATETIME, None, None, None, None, None, None, 10, datetime(1, 2, 3, 4, 5, 6, 10, tzinfo=UTC)), + # datetime - change microsecond (invalid) + (DATETIME, None, None, None, None, None, None, 1000000, None), + # date - change year + (DATE, 10, None, None, None, None, None, None, date(10, 2, 3)), + # date - change month (valid) + (DATE, None, 10, None, None, None, None, None, date(1, 10, 3)), + # date - change month (invalid) + (DATE, None, 13, None, None, None, None, None, None), + # date - change day (valid) + (DATE, None, None, 10, None, None, None, None, date(1, 2, 10)), + # date - change day (invalid) + (DATE, None, None, 32, None, None, None, None, None), + # date - change hour (valid) + (DATE, None, None, None, 10, None, None, None, DATE), + # date - change hour (invalid) + (DATE, None, None, None, 24, None, None, None, DATE), + # date - change minute (valid) + (DATE, None, None, None, None, 10, None, None, DATE), + # date - change minute (invalid) + (DATE, None, None, None, None, 60, None, None, DATE), + # date - change second (valid) + (DATE, None, None, None, None, None, 10, None, DATE), + # date - change second (invalid) + (DATE, None, None, None, None, None, 60, None, DATE), + # date - change microsecond (valid) + (DATE, None, None, None, None, None, None, 10, DATE), + # date - change microsecond (invalid) + (DATE, None, None, None, None, None, None, 1000000, DATE), + # None + (None, None, None, None, None, None, None, None, None), + ], + ids=[ + # datetime + "datetime - change year", + "datetime - change month (valid)", + "datetime - change month (invalid)", + "datetime - change day (valid)", + "datetime - change day (invalid)", + "datetime - change hour (valid)", + "datetime - change hour (invalid)", + "datetime - change minute (valid)", + "datetime - change minute (invalid)", + "datetime - change second (valid)", + "datetime - change second (invalid)", + "datetime - change microsecond (valid)", + "datetime - change microsecond (invalid)", + # date + "date - change year", + "date - change month (valid)", + "date - change month (invalid)", + "date - change day (valid)", + "date - change day (invalid)", + "date - change hour (valid)", + "date - change hour (invalid)", + "date - change minute (valid)", + "date - change minute (invalid)", + "date - change second (valid)", + "date - change second (invalid)", + "date - change microsecond (valid)", + "date - change microsecond (invalid)", + # None + "None", + ], +) +def test_should_replace_components( + value: datetime | date | None, + year: int | None, + month: int | None, + day: int | None, + hour: int | None, + minute: int | None, + second: int | None, + microsecond: int | None, + expected: int | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.replace( + year=year, + month=month, + day=day, + hour=hour, + minute=minute, + second=second, + microsecond=microsecond, + ), + expected, + type_if_none=ColumnType.datetime(), + ) From de11c55a6094ccb156e503f2e8ac9783abf3df07 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 19 Jan 2025 13:00:03 +0100 Subject: [PATCH 46/57] test: first tests for `to_string` --- ...onvert_and_check_datetime_format_module.py | 6 +- .../tabular/query/_datetime_operations.py | 14 +-- .../test_to_string.py | 105 ++++++++++++++++++ 3 files changed, 115 insertions(+), 10 deletions(-) create mode 100644 tests/safeds/data/tabular/query/_lazy_datetime_operations/test_to_string.py diff --git a/src/safeds/_validation/_convert_and_check_datetime_format_module.py b/src/safeds/_validation/_convert_and_check_datetime_format_module.py index 7bc249772..002743737 100644 --- a/src/safeds/_validation/_convert_and_check_datetime_format_module.py +++ b/src/safeds/_validation/_convert_and_check_datetime_format_module.py @@ -134,7 +134,7 @@ def _convert_and_check_datetime_format( elif char == "{": end_index = format_.find("}", index) if end_index == -1: - message = f"Unclosed template expression at index {index}." + message = f"Unclosed specifier at index {index}." raise ValueError(message) expression = format_[index + 1 : end_index] @@ -174,7 +174,7 @@ def _convert_and_check_template_expression( if expression in replacements: return "%" + replacements[expression] - # Unknown template expression + # Unknown specifier message = _build_error_message(expression, type_, list(replacements.keys())) raise ValueError(message) @@ -184,7 +184,7 @@ def _build_error_message( type_: str, valid_expressions: list[str], ) -> str: - result = f"Invalid template expression '{expression}' for type {type_}." + result = f"Invalid specifier '{expression}' for type {type_}." similar_expressions = _get_similar_strings(expression, valid_expressions) if similar_expressions: diff --git a/src/safeds/data/tabular/query/_datetime_operations.py b/src/safeds/data/tabular/query/_datetime_operations.py index 224460f30..7e9546b8f 100644 --- a/src/safeds/data/tabular/query/_datetime_operations.py +++ b/src/safeds/data/tabular/query/_datetime_operations.py @@ -803,11 +803,11 @@ def to_string(self, *, format: str = "iso") -> Cell[str | None]: Convert a datetime, date, or time to a string. The `format` parameter controls the presentation. It can be `"iso"` to target ISO 8601 or a custom string. The - custom string can contain fixed placeholders (see below), which are replaced with the corresponding values. The - placeholders are case-sensitive and always enclosed in curly braces. Other text is included in the output + custom string can contain fixed specifiers (see below), which are replaced with the corresponding values. The + specifiers are case-sensitive and always enclosed in curly braces. Other text is included in the output verbatim. To include a literal opening curly brace, use `\{`, and to include a literal backslash, use `\\`. - The following placeholders for _date components_ are available for **datetime** and **date**: + The following specifiers for _date components_ are available for **datetime** and **date**: - `{Y}`, `{_Y}`, `{^Y}`: Year (zero-padded to four digits, space-padded to four digits, no padding). - `{Y99}`, `{_Y99}`, `{^Y99}`: Year modulo 100 (zero-padded to two digits, space-padded to two digits, no @@ -824,7 +824,7 @@ def to_string(self, *, format: str = "iso") -> Cell[str | None]: - `{DOY}`, `{_DOY}`, `{^DOY}`: Day of the year, ranging from 1 to 366 (zero-padded to three digits, space-padded to three digits, no padding). - The following placeholders for _time components_ are available for **datetime** and **time**: + The following specifiers for _time components_ are available for **datetime** and **time**: - `{h}`, `{_h}`, `{^h}`: Hour (zero-padded to two digits, space-padded to two digits, no padding). - `{h12}`, `{_h12}`, `{^h12}`: Hour in 12-hour format (zero-padded to two digits, space-padded to two digits, no @@ -838,16 +838,16 @@ def to_string(self, *, format: str = "iso") -> Cell[str | None]: - `{AM/PM}`: AM or PM (uppercase). - `{am/pm}`: am or pm (lowercase). - The following placeholders are available for **datetime** only: + The following specifiers are available for **datetime** only: - `{z}`: Offset of the timezone from UTC without a colon (e.g. "+0000"). - `{:z}`: Offset of the timezone from UTC with a colon (e.g. "+00:00"). - `{u}`: The UNIX timestamp in seconds. - The placeholders follow certain conventions: + The specifiers follow certain conventions: - Generally, date components use uppercase letters and time components use lowercase letters. - - If a component may be formatted in multiple ways, we use shorter placeholders for ISO 8601. Placeholders for + - If a component may be formatted in multiple ways, we use shorter specifiers for ISO 8601. Specifiers for other formats have a prefix (same value with different padding, see below) or suffix (other differences). - By default, value are zero-padded, where applicable. - A leading underscore (`_`) means the value is space-padded. diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_to_string.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_to_string.py new file mode 100644 index 000000000..f4d7e3ce3 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_to_string.py @@ -0,0 +1,105 @@ +from datetime import UTC, date, datetime, time + +import pytest +from helpers import assert_cell_operation_works +from polars.polars import ComputeError, PanicException + +from safeds.data.tabular.containers import Column +from safeds.data.tabular.typing import ColumnType + +DATETIME = datetime(1, 2, 3, 4, 5, 6, 7, tzinfo=UTC) +DATE = date(1, 2, 3) +TIME = time(4, 5, 6, 7) + + +@pytest.mark.parametrize( + ("value", "format_", "expected"), + [ + (datetime(1, 2, 3, 4, 5, 6, 7), "iso", "0001-02-03T04:05:06.000007"), # noqa: DTZ001 + (DATETIME, "iso", "0001-02-03T04:05:06.000007+00:00"), + (None, "iso", None), + ], + ids=[ + "iso, no time zone", + "iso, with time zone", + "None", + ], +) +def test_should_stringify_datetime(value: datetime | None, format_: str, expected: str | None): + assert_cell_operation_works( + value, + lambda cell: cell.dt.to_string(format=format_), + expected, + type_if_none=ColumnType.datetime(), + ) + + +@pytest.mark.parametrize( + ("value", "format_", "expected"), + [ + (DATE, "iso", "0001-02-03"), + (None, "iso", None), + ], + ids=[ + "iso", + "None", + ], +) +def test_should_stringify_date(value: date | None, format_: str, expected: str | None): + assert_cell_operation_works( + value, + lambda cell: cell.dt.to_string(format=format_), + expected, + type_if_none=ColumnType.date(), + ) + + +@pytest.mark.parametrize( + ("value", "format_", "expected"), + [ + (TIME, "iso", "04:05:06.000007"), + (None, "iso", None), + ], + ids=[ + "iso", + "None", + ], +) +def test_should_stringify_time(value: time | None, format_: str, expected: str | None): + assert_cell_operation_works( + value, + lambda cell: cell.dt.to_string(format=format_), + expected, + type_if_none=ColumnType.time(), + ) + + +def test_should_raise_for_unclosed_specifier(): + column = Column("a", [DATETIME]) + with pytest.raises(ValueError, match="Unclosed specifier"): + column.transform(lambda cell: cell.dt.to_string(format="{Y")) + + +def test_should_raise_for_globally_invalid_specifier(): + column = Column("a", [DATETIME]) + with pytest.raises(ValueError, match="Invalid specifier"): + column.transform(lambda cell: cell.dt.to_string(format="{invalid}")) + + +@pytest.mark.parametrize( + ("value", "format_"), + [ + (DATE, "{h}"), + (TIME, "{Y}"), + ], + ids=[ + "invalid for date", + "invalid for time", + ], +) +def test_should_raise_for_specifier_that_is_invalid_for_type(value: date | time | None, format_: str): + # TODO: This is not the ideal behavior. Once https://github.com/Safe-DS/Library/issues/860 is resolved, we should + # do our own validation to raise an error that knows our own specifiers. + column = Column("a", [value]) + with pytest.raises((ComputeError, PanicException)): + column.transform(lambda cell: cell.dt.to_string(format=format_)) From 1bda757a02209407fe9509a5bda50db24dd6b423 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 19 Jan 2025 13:33:24 +0100 Subject: [PATCH 47/57] feat: specify time zone of ColumnType.datetime --- src/safeds/_utils/_string.py | 10 +++++- src/safeds/_validation/__init__.py | 3 ++ .../_validation/_check_time_zone_module.py | 34 +++++++++++++++++++ ...onvert_and_check_datetime_format_module.py | 9 +++-- .../data/tabular/typing/_column_type.py | 19 +++++++++-- .../_polars_column_type/test_datetime.py | 8 +++++ 6 files changed, 76 insertions(+), 7 deletions(-) create mode 100644 src/safeds/_validation/_check_time_zone_module.py create mode 100644 tests/safeds/data/tabular/typing/_polars_column_type/test_datetime.py diff --git a/src/safeds/_utils/_string.py b/src/safeds/_utils/_string.py index 40bf86f6b..6aadadce8 100644 --- a/src/safeds/_utils/_string.py +++ b/src/safeds/_utils/_string.py @@ -1,4 +1,12 @@ -def _get_similar_strings(string: str, valid_strings: list[str]) -> list[str]: +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Iterable + + +def _get_similar_strings(string: str, valid_strings: Iterable[str]) -> list[str]: from difflib import get_close_matches close_matches = get_close_matches(string, valid_strings, n=3) diff --git a/src/safeds/_validation/__init__.py b/src/safeds/_validation/__init__.py index ecf75ca06..004386c24 100644 --- a/src/safeds/_validation/__init__.py +++ b/src/safeds/_validation/__init__.py @@ -13,6 +13,7 @@ from ._check_indices_module import _check_indices from ._check_row_counts_are_equal_module import _check_row_counts_are_equal from ._check_schema_module import _check_schema + from ._check_time_zone_module import _check_time_zone from ._convert_and_check_datetime_format_module import _convert_and_check_datetime_format from ._normalize_and_check_file_path_module import _normalize_and_check_file_path @@ -30,6 +31,7 @@ "_check_indices": "._check_indices_module:_check_indices", "_check_row_counts_are_equal": "._check_row_counts_are_equal_module:_check_row_counts_are_equal", "_check_schema": "._check_schema_module:_check_schema", + "_check_time_zone": "._check_time_zone_module:_check_time_zone", "_convert_and_check_datetime_format": "._convert_and_check_datetime_format_module:_convert_and_check_datetime_format", "_normalize_and_check_file_path": "._normalize_and_check_file_path_module:_normalize_and_check_file_path", }, @@ -47,6 +49,7 @@ "_check_indices", "_check_row_counts_are_equal", "_check_schema", + "_check_time_zone", "_convert_and_check_datetime_format", "_normalize_and_check_file_path", ] diff --git a/src/safeds/_validation/_check_time_zone_module.py b/src/safeds/_validation/_check_time_zone_module.py new file mode 100644 index 000000000..682b742f9 --- /dev/null +++ b/src/safeds/_validation/_check_time_zone_module.py @@ -0,0 +1,34 @@ +import zoneinfo + +from safeds._utils import _get_similar_strings + +_VALID_TZ_IDENTIFIERS = zoneinfo.available_timezones() + + +def _check_time_zone(time_zone: str | None): + """ + Check if the time zone is valid. + + Parameters + ---------- + time_zone: + The time zone to check. + + Raises + ------ + ValueError + If the time zone is invalid. + """ + if time_zone is not None and time_zone not in _VALID_TZ_IDENTIFIERS: + message = _build_error_message(time_zone) + raise ValueError(message) + + +def _build_error_message(time_zone: str): + result = f"Invalid time zone '{time_zone}'." + + similar_time_zones = _get_similar_strings(time_zone, _VALID_TZ_IDENTIFIERS) + if similar_time_zones: + result += f" Did you mean one of {similar_time_zones}?" + + return result diff --git a/src/safeds/_validation/_convert_and_check_datetime_format_module.py b/src/safeds/_validation/_convert_and_check_datetime_format_module.py index 002743737..0da39f9ba 100644 --- a/src/safeds/_validation/_convert_and_check_datetime_format_module.py +++ b/src/safeds/_validation/_convert_and_check_datetime_format_module.py @@ -1,9 +1,12 @@ from __future__ import annotations -from typing import Literal +from typing import TYPE_CHECKING, Literal from safeds._utils import _get_similar_strings +if TYPE_CHECKING: + from collections.abc import Iterable + _DATE_REPLACEMENTS = { # Year "Y": "Y", @@ -175,14 +178,14 @@ def _convert_and_check_template_expression( return "%" + replacements[expression] # Unknown specifier - message = _build_error_message(expression, type_, list(replacements.keys())) + message = _build_error_message(expression, type_, replacements.keys()) raise ValueError(message) def _build_error_message( expression: str, type_: str, - valid_expressions: list[str], + valid_expressions: Iterable[str], ) -> str: result = f"Invalid specifier '{expression}' for type {type_}." diff --git a/src/safeds/data/tabular/typing/_column_type.py b/src/safeds/data/tabular/typing/_column_type.py index 532a3448e..7409e6af7 100644 --- a/src/safeds/data/tabular/typing/_column_type.py +++ b/src/safeds/data/tabular/typing/_column_type.py @@ -3,6 +3,8 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING +from safeds._validation import _check_time_zone + if TYPE_CHECKING: import polars as pl @@ -126,13 +128,24 @@ def date() -> ColumnType: return _PolarsColumnType(pl.Date()) @staticmethod - def datetime() -> ColumnType: - """Create a `datetime` column type.""" + def datetime(*, time_zone: str | None = None) -> ColumnType: + """ + Create a `datetime` column type. + + Parameters + ---------- + time_zone: + The time zone. If None, values are assumed to be in local time. This is different from setting the time zone + to `"UTC"`. Any TZ identifier defined in the + [tz database](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) is valid. + """ import polars as pl from ._polars_column_type import _PolarsColumnType # circular import - return _PolarsColumnType(pl.Datetime()) + _check_time_zone(time_zone) + + return _PolarsColumnType(pl.Datetime(time_zone=time_zone)) @staticmethod def duration() -> ColumnType: diff --git a/tests/safeds/data/tabular/typing/_polars_column_type/test_datetime.py b/tests/safeds/data/tabular/typing/_polars_column_type/test_datetime.py new file mode 100644 index 000000000..8ab676e21 --- /dev/null +++ b/tests/safeds/data/tabular/typing/_polars_column_type/test_datetime.py @@ -0,0 +1,8 @@ +import pytest + +from safeds.data.tabular.typing import ColumnType + + +def test_should_raise_if_time_zone_is_invalid(): + with pytest.raises(ValueError, match="Invalid time zone"): + ColumnType.datetime(time_zone="invalid") From 143bd18af9a7ec27ac2b741eb02b9a86fefd6fc1 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 19 Jan 2025 13:43:34 +0100 Subject: [PATCH 48/57] feat: specify time zone for `Cell.datetime` --- src/safeds/data/tabular/containers/_cell.py | 22 ++++++- .../containers/_lazy_cell/test_datetime.py | 57 ++++++++++++------- 2 files changed, 56 insertions(+), 23 deletions(-) diff --git a/src/safeds/data/tabular/containers/_cell.py b/src/safeds/data/tabular/containers/_cell.py index b71da80de..202af7661 100644 --- a/src/safeds/data/tabular/containers/_cell.py +++ b/src/safeds/data/tabular/containers/_cell.py @@ -3,6 +3,8 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING, Generic, TypeVar +from safeds._validation import _check_time_zone + if TYPE_CHECKING: import datetime as python_datetime @@ -150,6 +152,7 @@ def datetime( minute: _ConvertibleToIntCell = 0, second: _ConvertibleToIntCell = 0, microsecond: _ConvertibleToIntCell = 0, + time_zone: str | None = None, ) -> Cell[python_datetime.datetime | None]: """ Create a cell with a datetime. @@ -172,6 +175,10 @@ def datetime( The second. Must be between 0 and 59. microsecond: The microsecond. Must be between 0 and 999,999. + time_zone: + The time zone. If None, values are assumed to be in local time. This is different from setting the time zone + to `"UTC"`. Any TZ identifier defined in the + [tz database](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) is valid. Returns ------- @@ -208,6 +215,8 @@ def datetime( from ._lazy_cell import _LazyCell # circular import + _check_time_zone(time_zone) + pl_year = _to_polars_expression(year) pl_month = _to_polars_expression(month) pl_day = _to_polars_expression(day) @@ -219,7 +228,18 @@ def datetime( # By default, microseconds overflow into seconds return _LazyCell( pl.when(pl_microsecond <= 999_999) - .then(pl.datetime(pl_year, pl_month, pl_day, pl_hour, pl_minute, pl_second, pl_microsecond)) + .then( + pl.datetime( + pl_year, + pl_month, + pl_day, + pl_hour, + pl_minute, + pl_second, + pl_microsecond, + time_zone=time_zone, + ), + ) .otherwise(None), ) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_datetime.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_datetime.py index dcab1d935..82aa6a072 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_datetime.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_datetime.py @@ -1,14 +1,14 @@ -from datetime import datetime +from datetime import UTC, datetime import pytest from safeds._typing import _ConvertibleToIntCell -from safeds.data.tabular.containers import Cell +from safeds.data.tabular.containers import Cell, Column from tests.helpers import assert_cell_operation_works @pytest.mark.parametrize( - ("year", "month", "day", "hour", "minute", "second", "microsecond", "expected"), + ("year", "month", "day", "hour", "minute", "second", "microsecond", "time_zone", "expected"), [ ( 1, @@ -18,6 +18,7 @@ 5, 6, 7, + None, datetime(1, 2, 3, 4, 5, 6, 7), # noqa: DTZ001 ), ( @@ -28,34 +29,37 @@ Cell.constant(5), Cell.constant(6), Cell.constant(7), + None, datetime(1, 2, 3, 4, 5, 6, 7), # noqa: DTZ001 ), # invalid year - (None, 2, 3, 4, 5, 6, 7, None), + (None, 2, 3, 4, 5, 6, 7, None, None), # invalid month - (1, None, 3, 4, 5, 6, 7, None), - (1, 0, 3, 4, 5, 6, 7, None), - (1, 13, 3, 4, 5, 6, 7, None), + (1, None, 3, 4, 5, 6, 7, None, None), + (1, 0, 3, 4, 5, 6, 7, None, None), + (1, 13, 3, 4, 5, 6, 7, None, None), # invalid day - (1, 2, None, 4, 5, 6, 7, None), - (1, 2, 0, 4, 5, 6, 7, None), - (1, 2, 32, 4, 5, 6, 7, None), + (1, 2, None, 4, 5, 6, 7, None, None), + (1, 2, 0, 4, 5, 6, 7, None, None), + (1, 2, 32, 4, 5, 6, 7, None, None), # invalid hour - (1, 2, 3, None, 5, 6, 7, None), - (1, 2, 3, -1, 5, 6, 7, None), - (1, 2, 3, 24, 5, 6, 7, None), + (1, 2, 3, None, 5, 6, 7, None, None), + (1, 2, 3, -1, 5, 6, 7, None, None), + (1, 2, 3, 24, 5, 6, 7, None, None), # invalid minute - (1, 2, 3, 4, None, 6, 7, None), - (1, 2, 3, 4, -1, 6, 7, None), - (1, 2, 3, 4, 60, 6, 7, None), + (1, 2, 3, 4, None, 6, 7, None, None), + (1, 2, 3, 4, -1, 6, 7, None, None), + (1, 2, 3, 4, 60, 6, 7, None, None), # invalid second - (1, 2, 3, 4, 5, None, 7, None), - (1, 2, 3, 4, 5, -1, 7, None), - (1, 2, 3, 4, 5, 60, 7, None), + (1, 2, 3, 4, 5, None, 7, None, None), + (1, 2, 3, 4, 5, -1, 7, None, None), + (1, 2, 3, 4, 5, 60, 7, None, None), # invalid microsecond - (1, 2, 3, 4, 5, 6, None, None), - (1, 2, 3, 4, 5, 6, -1, None), - (1, 2, 3, 4, 5, 6, 1_000_000, None), + (1, 2, 3, 4, 5, 6, None, None, None), + (1, 2, 3, 4, 5, 6, -1, None, None), + (1, 2, 3, 4, 5, 6, 1_000_000, None, None), + # with time zone + (1, 2, 3, 4, 5, 6, 7, "UTC", datetime(1, 2, 3, 4, 5, 6, 7, tzinfo=UTC)), ], ids=[ "int components", @@ -79,6 +83,7 @@ "microsecond is None", "microsecond is too low", "microsecond is too high", + "with time zone", ], ) def test_should_return_datetime( @@ -89,6 +94,7 @@ def test_should_return_datetime( minute: _ConvertibleToIntCell, second: _ConvertibleToIntCell, microsecond: _ConvertibleToIntCell, + time_zone: str | None, expected: datetime, ) -> None: assert_cell_operation_works( @@ -101,6 +107,13 @@ def test_should_return_datetime( minute=minute, second=second, microsecond=microsecond, + time_zone=time_zone, ), expected, ) + + +def test_should_raise_if_time_zone_is_invalid(): + column = Column("a", [None]) + with pytest.raises(ValueError, match="Invalid time zone"): + column.transform(lambda _: Cell.datetime(1, 2, 3, time_zone="invalid")) From bfcafd16c5e9fa46ffa7656034d4d1d72bc1838d Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 19 Jan 2025 14:49:36 +0100 Subject: [PATCH 49/57] refactor: store LazyFrame in Column --- src/safeds/data/tabular/containers/_column.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/safeds/data/tabular/containers/_column.py b/src/safeds/data/tabular/containers/_column.py index 85ac6d44e..03d24acbe 100644 --- a/src/safeds/data/tabular/containers/_column.py +++ b/src/safeds/data/tabular/containers/_column.py @@ -3,7 +3,7 @@ from collections.abc import Callable, Iterator, Sequence from typing import TYPE_CHECKING, Literal, TypeVar, overload -from safeds._utils import _structural_hash +from safeds._utils import _safe_collect_lazy_frame, _structural_hash from safeds._validation import ( _check_column_has_no_missing_values, _check_column_is_numeric, @@ -16,7 +16,7 @@ from ._lazy_cell import _LazyCell if TYPE_CHECKING: - from polars import Series + import polars as pl from safeds.data.tabular.typing import ColumnType from safeds.exceptions import ( # noqa: F401 @@ -79,9 +79,10 @@ class Column(Sequence[T_co]): # ------------------------------------------------------------------------------------------------------------------ @staticmethod - def _from_polars_series(data: Series) -> Column: + def _from_polars_series(data: pl.Series) -> Column: result = object.__new__(Column) - result._series = data + result._lazy_frame = data.to_frame().lazy() + result.__series_cache = data return result # ------------------------------------------------------------------------------------------------------------------ @@ -101,7 +102,8 @@ def __init__( dtype = None if type is None else type._polars_data_type # Implementation - self._series: pl.Series = pl.Series(name, data, dtype=dtype, strict=False) + self._lazy_frame: pl.LazyFrame = pl.LazyFrame(data, schema={name: dtype}, strict=False) + self.__series_cache: pl.Series | None = None def __contains__(self, value: object) -> bool: import polars as pl @@ -157,6 +159,13 @@ def __str__(self) -> str: # Properties # ------------------------------------------------------------------------------------------------------------------ + @property + def _series(self) -> pl.Series: + if self.__series_cache is None: + self.__series_cache = _safe_collect_lazy_frame(self._lazy_frame).to_series(0) + + return self.__series_cache + @property def name(self) -> str: """ From 6786c2b2f618aa58e80d9390148995b4a58976c0 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 19 Jan 2025 15:07:21 +0100 Subject: [PATCH 50/57] refactor: create Column from a LazyFrame --- src/safeds/data/tabular/containers/_column.py | 17 ++++++++++--- src/safeds/data/tabular/containers/_table.py | 2 ++ .../_column/test_from_lazy_frame.py | 24 +++++++++++++++++++ .../_column/test_from_polars_series.py | 2 +- 4 files changed, 41 insertions(+), 4 deletions(-) create mode 100644 tests/safeds/data/tabular/containers/_column/test_from_lazy_frame.py diff --git a/src/safeds/data/tabular/containers/_column.py b/src/safeds/data/tabular/containers/_column.py index 03d24acbe..84635bc6c 100644 --- a/src/safeds/data/tabular/containers/_column.py +++ b/src/safeds/data/tabular/containers/_column.py @@ -3,7 +3,7 @@ from collections.abc import Callable, Iterator, Sequence from typing import TYPE_CHECKING, Literal, TypeVar, overload -from safeds._utils import _safe_collect_lazy_frame, _structural_hash +from safeds._utils import _safe_collect_lazy_frame, _safe_collect_lazy_frame_schema, _structural_hash from safeds._validation import ( _check_column_has_no_missing_values, _check_column_is_numeric, @@ -78,6 +78,13 @@ class Column(Sequence[T_co]): # Import # ------------------------------------------------------------------------------------------------------------------ + @staticmethod + def _from_polars_lazy_frame(data: pl.LazyFrame, name: str) -> Column: + result = object.__new__(Column) + result._lazy_frame = data.select(name) + result.__series_cache = None + return result + @staticmethod def _from_polars_series(data: pl.Series) -> Column: result = object.__new__(Column) @@ -178,13 +185,16 @@ def name(self) -> str: >>> column.name 'a' """ - return self._series.name + schema = _safe_collect_lazy_frame_schema(self._lazy_frame) + return schema.names()[0] @property def row_count(self) -> int: """ The number of rows. + **Note:** This operation must fully load the data into memory, which can be expensive. + Examples -------- >>> from safeds.data.tabular.containers import Column @@ -221,7 +231,8 @@ def type(self) -> ColumnType: >>> column.type int64 """ - return _PolarsColumnType(self._series.dtype) + schema = _safe_collect_lazy_frame_schema(self._lazy_frame) + return _PolarsColumnType(schema.dtypes()[0]) # ------------------------------------------------------------------------------------------------------------------ # Value operations diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 89d7d7dcf..6271b3cec 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -463,6 +463,8 @@ def schema(self) -> Schema: """ The schema of the table, which is a mapping from column names to their types. + **Note:** This operation must compute the schema of the table, which can be expensive. + Examples -------- >>> from safeds.data.tabular.containers import Table diff --git a/tests/safeds/data/tabular/containers/_column/test_from_lazy_frame.py b/tests/safeds/data/tabular/containers/_column/test_from_lazy_frame.py new file mode 100644 index 000000000..0fe6a2f88 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_column/test_from_lazy_frame.py @@ -0,0 +1,24 @@ +import polars as pl +import pytest + +from safeds.data.tabular.containers import Column + + +def test_should_store_the_name() -> None: + frame = pl.LazyFrame({"col1": []}) + assert Column._from_polars_lazy_frame(frame, "col1").name == "col1" + + +@pytest.mark.parametrize( + ("frame", "expected"), + [ + (pl.LazyFrame({"col1": []}), []), + (pl.LazyFrame({"col1": [True]}), [True]), + ], + ids=[ + "empty", + "non-empty", + ], +) +def test_should_store_the_data(frame: pl.LazyFrame, expected: list) -> None: + assert list(Column._from_polars_lazy_frame(frame, "col1")) == expected diff --git a/tests/safeds/data/tabular/containers/_column/test_from_polars_series.py b/tests/safeds/data/tabular/containers/_column/test_from_polars_series.py index 8a5348f15..59391852c 100644 --- a/tests/safeds/data/tabular/containers/_column/test_from_polars_series.py +++ b/tests/safeds/data/tabular/containers/_column/test_from_polars_series.py @@ -20,5 +20,5 @@ def test_should_store_the_name() -> None: "non-empty", ], ) -def test_should_store_the_data(series: pl.Series, expected: Column) -> None: +def test_should_store_the_data(series: pl.Series, expected: list) -> None: assert list(Column._from_polars_series(series)) == expected From e8a5f9e0646cccabeb85a8b5b45a19e3372ecf6d Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 19 Jan 2025 15:49:35 +0100 Subject: [PATCH 51/57] perf: make some operations lazy --- src/safeds/data/tabular/containers/_column.py | 53 ++++++++------ src/safeds/data/tabular/containers/_table.py | 72 +++++++++---------- .../_column/test_from_lazy_frame.py | 4 +- 3 files changed, 68 insertions(+), 61 deletions(-) diff --git a/src/safeds/data/tabular/containers/_column.py b/src/safeds/data/tabular/containers/_column.py index 84635bc6c..6c7b790fa 100644 --- a/src/safeds/data/tabular/containers/_column.py +++ b/src/safeds/data/tabular/containers/_column.py @@ -79,8 +79,9 @@ class Column(Sequence[T_co]): # ------------------------------------------------------------------------------------------------------------------ @staticmethod - def _from_polars_lazy_frame(data: pl.LazyFrame, name: str) -> Column: + def _from_polars_lazy_frame(name: str, data: pl.LazyFrame) -> Column: result = object.__new__(Column) + result._name = name result._lazy_frame = data.select(name) result.__series_cache = None return result @@ -88,6 +89,7 @@ def _from_polars_lazy_frame(data: pl.LazyFrame, name: str) -> Column: @staticmethod def _from_polars_series(data: pl.Series) -> Column: result = object.__new__(Column) + result._name = data.name result._lazy_frame = data.to_frame().lazy() result.__series_cache = data return result @@ -109,6 +111,7 @@ def __init__( dtype = None if type is None else type._polars_data_type # Implementation + self._name: str = name self._lazy_frame: pl.LazyFrame = pl.LazyFrame(data, schema={name: dtype}, strict=False) self.__series_cache: pl.Series | None = None @@ -137,8 +140,11 @@ def __getitem__(self, index: slice) -> Column[T_co]: ... def __getitem__(self, index: int | slice) -> T_co | Column[T_co]: if isinstance(index, int): return self.get_value(index) - else: - return self._from_polars_series(self._series.__getitem__(index)) + + try: + return self._from_polars_lazy_frame(self.name, self._lazy_frame[index]) + except ValueError: + return self._from_polars_series(self._series[index]) def __hash__(self) -> int: return _structural_hash( @@ -169,7 +175,7 @@ def __str__(self) -> str: @property def _series(self) -> pl.Series: if self.__series_cache is None: - self.__series_cache = _safe_collect_lazy_frame(self._lazy_frame).to_series(0) + self.__series_cache = _safe_collect_lazy_frame(self._lazy_frame).to_series() return self.__series_cache @@ -185,8 +191,7 @@ def name(self) -> str: >>> column.name 'a' """ - schema = _safe_collect_lazy_frame_schema(self._lazy_frame) - return schema.names()[0] + return self._name @property def row_count(self) -> int: @@ -335,7 +340,8 @@ def get_value(self, index: int) -> T_co: """ _check_indices(self, index) - return self._series.__getitem__(index) + # Lazy containers do not allow indexed accesses + return self._series[index] # ------------------------------------------------------------------------------------------------------------------ # Reductions @@ -413,9 +419,10 @@ def all( """ import polars as pl - # Expressions only work on data frames/lazy frames, so we wrap the polars Series first expression = predicate(_LazyCell(pl.col(self.name)))._polars_expression.all(ignore_nulls=ignore_unknown) - return self._series.to_frame().select(expression).item() + frame = _safe_collect_lazy_frame(self._lazy_frame.select(expression)) + + return frame.item() @overload def any( @@ -489,9 +496,10 @@ def any( """ import polars as pl - # Expressions only work on data frames/lazy frames, so we wrap the polars Series first expression = predicate(_LazyCell(pl.col(self.name)))._polars_expression.any(ignore_nulls=ignore_unknown) - return self._series.to_frame().select(expression).item() + frame = _safe_collect_lazy_frame(self._lazy_frame.select(expression)) + + return frame.item() @overload def count_if( @@ -554,11 +562,11 @@ def count_if( """ import polars as pl - # Expressions only work on data frames/lazy frames, so we wrap the polars Series first expression = predicate(_LazyCell(pl.col(self.name)))._polars_expression - series = self._series.to_frame().select(expression.alias(self.name)).get_column(self.name) + frame = _safe_collect_lazy_frame(self._lazy_frame.select(expression)) + series = frame.to_series() - if ignore_unknown or series.null_count() == 0: + if ignore_unknown or not series.has_nulls(): return series.sum() else: return None @@ -635,9 +643,10 @@ def none( """ import polars as pl - # Expressions only work on data frames/lazy frames, so we wrap the polars Series first expression = predicate(_LazyCell(pl.col(self.name)))._polars_expression.not_().all(ignore_nulls=ignore_unknown) - return self._series.to_frame().select(expression).item() + frame = _safe_collect_lazy_frame(self._lazy_frame.select(expression)) + + return frame.item() # ------------------------------------------------------------------------------------------------------------------ # Transformations @@ -674,7 +683,8 @@ def rename(self, new_name: str) -> Column[T_co]: | 3 | +-----+ """ - return self._from_polars_series(self._series.rename(new_name)) + result = self._lazy_frame.rename({self.name: new_name}) + return self._from_polars_lazy_frame(new_name, result) def transform( self, @@ -712,11 +722,10 @@ def transform( """ import polars as pl - # Expressions only work on data frames/lazy frames, so we wrap the polars Series first - expression = transformer(_LazyCell(pl.col(self.name)))._polars_expression - series = self._series.to_frame().with_columns(expression.alias(self.name)).get_column(self.name) + expression = transformer(_LazyCell(pl.col(self.name)))._polars_expression.alias(self.name) + result = self._lazy_frame.with_columns(expression) # with_columns always keeps number of rows - return self._from_polars_series(series) + return self._from_polars_lazy_frame(self.name, result) # ------------------------------------------------------------------------------------------------------------------ # Statistics @@ -1241,7 +1250,7 @@ def to_table(self) -> Table: """ from ._table import Table - return Table._from_polars_data_frame(self._series.to_frame()) + return Table._from_polars_lazy_frame(self._lazy_frame) # ------------------------------------------------------------------------------------------------------------------ # IPython integration diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 6271b3cec..d56c10b39 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -139,22 +139,21 @@ def from_columns(columns: Column | list[Column]) -> Table: +-----+-----+ """ import polars as pl - from polars.exceptions import DuplicateError, ShapeError if isinstance(columns, Column): columns = [columns] + if len(columns) == 0: + return Table({}) - try: - return Table._from_polars_lazy_frame( - pl.LazyFrame([column._series for column in columns]), - ) - # polars already validates this, so we don't do it upfront (performance) - except DuplicateError: - _check_columns_dont_exist(Table({}), [column.name for column in columns]) - return Table({}) # pragma: no cover - except ShapeError: - _check_row_counts_are_equal(columns) - return Table({}) # pragma: no cover + _check_columns_dont_exist(Table({}), [column.name for column in columns]) + _check_row_counts_are_equal(columns) + + return Table._from_polars_lazy_frame( + pl.concat( + [column._lazy_frame for column in columns], + how="horizontal", + ), + ) @staticmethod def from_csv_file(path: str | Path, *, separator: str = ",") -> Table: @@ -490,10 +489,7 @@ def add_columns( """ Add columns to the table and return the result as a new table. - **Notes:** - - - The original table is not modified. - - This operation must fully load the data into memory, which can be expensive. + **Note:** The original table is not modified. Parameters ---------- @@ -534,7 +530,7 @@ def add_columns( Add a column with values computed from other columns. - [`add_index_column`][safeds.data.tabular.containers._table.Table.add_index_column] """ - from polars.exceptions import DuplicateError, ShapeError + import polars as pl if isinstance(columns, Table): return self.add_tables_as_columns(columns) @@ -544,17 +540,18 @@ def add_columns( if len(columns) == 0: return self - try: - return Table._from_polars_data_frame( - self._data_frame.hstack([column._series for column in columns]), - ) - # polars already validates this, so we don't do it upfront (performance) - except DuplicateError: - _check_columns_dont_exist(self, [column.name for column in columns]) - return Table({}) # pragma: no cover - except ShapeError: - _check_row_counts_are_equal([self, *columns]) - return Table({}) # pragma: no cover + _check_columns_dont_exist(self, [column.name for column in columns]) + _check_row_counts_are_equal([self, *columns], ignore_entries_without_rows=True) + + return Table._from_polars_lazy_frame( + pl.concat( + [ + self._lazy_frame, + *[column._lazy_frame for column in columns], + ], + how="horizontal", + ), + ) def add_computed_column( self, @@ -722,9 +719,7 @@ def get_column(self, name: str) -> Column: +-----+ """ _check_columns_exist(self, name) - return Column._from_polars_series( - _safe_collect_lazy_frame(self._lazy_frame.select(name)).get_column(name), - ) + return Column._from_polars_lazy_frame(name, self._lazy_frame) def get_column_type(self, name: str) -> ColumnType: """ @@ -1088,6 +1083,8 @@ def replace_column( | 9 | 12 | 6 | +-----+-----+-----+ """ + import polars.selectors as cs + if isinstance(new_columns, Column): new_columns = [new_columns] elif isinstance(new_columns, Table): @@ -1106,15 +1103,14 @@ def replace_column( self._lazy_frame.with_columns(new_column._series.alias(old_name)).rename({old_name: new_column.name}), ) - import polars as pl - - index = self.column_names.index(old_name) + column_names = self.column_names + index = column_names.index(old_name) return Table._from_polars_lazy_frame( self._lazy_frame.select( - *[pl.col(name) for name in self.column_names[:index]], + cs.by_name(column_names[:index]), *[column._series for column in new_columns], - *[pl.col(name) for name in self.column_names[index + 1 :]], + cs.by_name(column_names[index + 1 :]), ), ) @@ -2550,7 +2546,7 @@ def to_columns(self) -> list[Column]: >>> table = Table({"a": [1, 2, 3], "b": [4, 5, 6]}) >>> columns = table.to_columns() """ - return [Column._from_polars_series(column) for column in self._data_frame.get_columns()] + return [Column._from_polars_lazy_frame(name, self._lazy_frame) for name in self.column_names] def to_csv_file(self, path: str | Path) -> None: """ @@ -2589,6 +2585,8 @@ def to_dict(self) -> dict[str, list[Any]]: """ Return a dictionary that maps column names to column values. + **Note:** This operation must fully load the data into memory, which can be expensive. + Returns ------- dict: diff --git a/tests/safeds/data/tabular/containers/_column/test_from_lazy_frame.py b/tests/safeds/data/tabular/containers/_column/test_from_lazy_frame.py index 0fe6a2f88..9791c3e08 100644 --- a/tests/safeds/data/tabular/containers/_column/test_from_lazy_frame.py +++ b/tests/safeds/data/tabular/containers/_column/test_from_lazy_frame.py @@ -6,7 +6,7 @@ def test_should_store_the_name() -> None: frame = pl.LazyFrame({"col1": []}) - assert Column._from_polars_lazy_frame(frame, "col1").name == "col1" + assert Column._from_polars_lazy_frame("col1", frame).name == "col1" @pytest.mark.parametrize( @@ -21,4 +21,4 @@ def test_should_store_the_name() -> None: ], ) def test_should_store_the_data(frame: pl.LazyFrame, expected: list) -> None: - assert list(Column._from_polars_lazy_frame(frame, "col1")) == expected + assert list(Column._from_polars_lazy_frame("col1", frame)) == expected From a7a4ad1d593bc35047baf2119527d5a2abd43eaf Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 19 Jan 2025 16:43:51 +0100 Subject: [PATCH 52/57] test: `dt.to_string` --- .../test_to_string.py | 210 +++++++++++++++--- 1 file changed, 173 insertions(+), 37 deletions(-) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_to_string.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_to_string.py index f4d7e3ce3..f8e0b7ea9 100644 --- a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_to_string.py +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_to_string.py @@ -2,10 +2,10 @@ import pytest from helpers import assert_cell_operation_works -from polars.polars import ComputeError, PanicException from safeds.data.tabular.containers import Column from safeds.data.tabular.typing import ColumnType +from safeds.exceptions import LazyComputationError DATETIME = datetime(1, 2, 3, 4, 5, 6, 7, tzinfo=UTC) DATE = date(1, 2, 3) @@ -13,74 +13,205 @@ @pytest.mark.parametrize( - ("value", "format_", "expected"), + ("value", "expected"), [ - (datetime(1, 2, 3, 4, 5, 6, 7), "iso", "0001-02-03T04:05:06.000007"), # noqa: DTZ001 - (DATETIME, "iso", "0001-02-03T04:05:06.000007+00:00"), - (None, "iso", None), + (datetime(1, 2, 3, 4, 5, 6, 7), "0001-02-03T04:05:06.000007"), # noqa: DTZ001 + (DATETIME, "0001-02-03T04:05:06.000007+00:00"), + (DATE, "0001-02-03"), + (TIME, "04:05:06.000007"), + (None, None), ], ids=[ - "iso, no time zone", - "iso, with time zone", + "datetime without time zone", + "datetime with time zone", + "date", + "time", "None", ], ) -def test_should_stringify_datetime(value: datetime | None, format_: str, expected: str | None): +def test_should_handle_iso_8601(value: datetime | date | time | None, expected: str | None): assert_cell_operation_works( value, - lambda cell: cell.dt.to_string(format=format_), + lambda cell: cell.dt.to_string(format="iso"), expected, type_if_none=ColumnType.datetime(), ) @pytest.mark.parametrize( - ("value", "format_", "expected"), + "value", [ - (DATE, "iso", "0001-02-03"), - (None, "iso", None), + DATETIME, + DATE, ], ids=[ - "iso", - "None", + "datetime", + "date", ], ) -def test_should_stringify_date(value: date | None, format_: str, expected: str | None): - assert_cell_operation_works( - value, - lambda cell: cell.dt.to_string(format=format_), - expected, - type_if_none=ColumnType.date(), +class TestDateSpecifiers: + @pytest.mark.parametrize( + ("format_", "expected"), + [ + ("{Y}", "0001"), + ("{_Y}", " 1"), + ("{^Y}", "1"), + ("{Y99}", "01"), + ("{_Y99}", " 1"), + ("{^Y99}", "1"), + ("{M}", "02"), + ("{_M}", " 2"), + ("{^M}", "2"), + ("{M-full}", "February"), + ("{M-short}", "Feb"), + ("{W}", "05"), + ("{_W}", " 5"), + ("{^W}", "5"), + ("{D}", "03"), + ("{_D}", " 3"), + ("{^D}", "3"), + ("{DOW}", "6"), + ("{DOW-full}", "Saturday"), + ("{DOW-short}", "Sat"), + ("{DOY}", "034"), + ("{_DOY}", " 34"), + ("{^DOY}", "34"), + ], + ids=[ + "{Y}", + "{_Y}", + "{^Y}", + "{Y99}", + "{_Y99}", + "{^Y99}", + "{M}", + "{_M}", + "{^M}", + "{M-full}", + "{M-short}", + "{W}", + "{_W}", + "{^W}", + "{D}", + "{_D}", + "{^D}", + "{DOW}", + "{DOW-full}", + "{DOW-short}", + "{DOY}", + "{_DOY}", + "{^DOY}", + ], ) + def test_should_be_replaced_with_correct_string(self, value: datetime | date, format_: str, expected: str) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.to_string(format=format_), + expected, + ) @pytest.mark.parametrize( - ("value", "format_", "expected"), + "value", [ - (TIME, "iso", "04:05:06.000007"), - (None, "iso", None), + DATETIME, + TIME, ], ids=[ - "iso", - "None", + "datetime", + "time", ], ) -def test_should_stringify_time(value: time | None, format_: str, expected: str | None): - assert_cell_operation_works( - value, - lambda cell: cell.dt.to_string(format=format_), - expected, - type_if_none=ColumnType.time(), +class TestTimeSpecifiers: + @pytest.mark.parametrize( + ("format_", "expected"), + [ + ("{h}", "04"), + ("{_h}", " 4"), + ("{^h}", "4"), + ("{h12}", "04"), + ("{_h12}", " 4"), + ("{^h12}", "4"), + ("{m}", "05"), + ("{_m}", " 5"), + ("{^m}", "5"), + ("{s}", "06"), + ("{_s}", " 6"), + ("{^s}", "6"), + ("{.f}", ".000007"), + ("{ms}", "000"), + ("{us}", "000007"), + ("{ns}", "000007000"), + ("{AM/PM}", "AM"), + ("{am/pm}", "am"), + ], + ids=[ + "{h}", + "{_h}", + "{^h}", + "{h12}", + "{_h12}", + "{^h12}", + "{m}", + "{_m}", + "{^m}", + "{s}", + "{_s}", + "{^s}", + "{.f}", + "{ms}", + "{us}", + "{ns}", + "{AM/PM}", + "{am/pm}", + ], + ) + def test_should_be_replaced_with_correct_string(self, value: datetime | time, format_: str, expected: str) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.to_string(format=format_), + expected, + ) + + +@pytest.mark.parametrize( + "value", + [ + DATETIME, + ], + ids=[ + "datetime", + ], +) +class TestDateTimeSpecifiers: + @pytest.mark.parametrize( + ("format_", "expected"), + [ + ("{z}", "+0000"), + ("{:z}", "+00:00"), + ("{u}", "-62132730894"), + ], + ids=[ + "{z}", + "{:z}", + "{u}", + ], ) + def test_should_be_replaced_with_correct_string(self, value: datetime, format_: str, expected: str) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.to_string(format=format_), + expected, + ) -def test_should_raise_for_unclosed_specifier(): +def test_should_raise_for_unclosed_specifier() -> None: column = Column("a", [DATETIME]) with pytest.raises(ValueError, match="Unclosed specifier"): column.transform(lambda cell: cell.dt.to_string(format="{Y")) -def test_should_raise_for_globally_invalid_specifier(): +def test_should_raise_for_globally_invalid_specifier() -> None: column = Column("a", [DATETIME]) with pytest.raises(ValueError, match="Invalid specifier"): column.transform(lambda cell: cell.dt.to_string(format="{invalid}")) @@ -90,16 +221,21 @@ def test_should_raise_for_globally_invalid_specifier(): ("value", "format_"), [ (DATE, "{h}"), - (TIME, "{Y}"), + pytest.param( + TIME, + "{Y}", + marks=pytest.mark.skip("polars panics in this case (https://github.com/pola-rs/polars/issues/19853)."), + ), ], ids=[ "invalid for date", "invalid for time", ], ) -def test_should_raise_for_specifier_that_is_invalid_for_type(value: date | time | None, format_: str): +def test_should_raise_for_specifier_that_is_invalid_for_type(value: date | time | None, format_: str) -> None: # TODO: This is not the ideal behavior. Once https://github.com/Safe-DS/Library/issues/860 is resolved, we should # do our own validation to raise an error that knows our own specifiers. column = Column("a", [value]) - with pytest.raises((ComputeError, PanicException)): - column.transform(lambda cell: cell.dt.to_string(format=format_)) + lazy_result = column.transform(lambda cell: cell.dt.to_string(format=format_)) + with pytest.raises(LazyComputationError): + lazy_result.get_value(0) From 47930e16f7eeaac369393609b8ceb29981c7f981 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 19 Jan 2025 16:45:45 +0100 Subject: [PATCH 53/57] style: fix mypy errors --- src/safeds/_validation/_check_time_zone_module.py | 4 ++-- .../_validation/_convert_and_check_datetime_format_module.py | 2 +- .../data/tabular/containers/_lazy_cell/test_datetime.py | 2 +- .../tabular/query/_lazy_datetime_operations/test_to_string.py | 2 +- .../data/tabular/typing/_polars_column_type/test_datetime.py | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/safeds/_validation/_check_time_zone_module.py b/src/safeds/_validation/_check_time_zone_module.py index 682b742f9..a05126c73 100644 --- a/src/safeds/_validation/_check_time_zone_module.py +++ b/src/safeds/_validation/_check_time_zone_module.py @@ -5,7 +5,7 @@ _VALID_TZ_IDENTIFIERS = zoneinfo.available_timezones() -def _check_time_zone(time_zone: str | None): +def _check_time_zone(time_zone: str | None) -> None: """ Check if the time zone is valid. @@ -24,7 +24,7 @@ def _check_time_zone(time_zone: str | None): raise ValueError(message) -def _build_error_message(time_zone: str): +def _build_error_message(time_zone: str) -> str: result = f"Invalid time zone '{time_zone}'." similar_time_zones = _get_similar_strings(time_zone, _VALID_TZ_IDENTIFIERS) diff --git a/src/safeds/_validation/_convert_and_check_datetime_format_module.py b/src/safeds/_validation/_convert_and_check_datetime_format_module.py index 0da39f9ba..dc6abaa62 100644 --- a/src/safeds/_validation/_convert_and_check_datetime_format_module.py +++ b/src/safeds/_validation/_convert_and_check_datetime_format_module.py @@ -114,7 +114,7 @@ def _convert_and_check_datetime_format( index = 0 while index < len(format_): - char = char_at(format_, index) + char = format_[index] # Escaped characters if char == "\\" and char_at(format_, index + 1) == "\\": diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_datetime.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_datetime.py index 82aa6a072..fd2586f9a 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_datetime.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_datetime.py @@ -113,7 +113,7 @@ def test_should_return_datetime( ) -def test_should_raise_if_time_zone_is_invalid(): +def test_should_raise_if_time_zone_is_invalid() -> None: column = Column("a", [None]) with pytest.raises(ValueError, match="Invalid time zone"): column.transform(lambda _: Cell.datetime(1, 2, 3, time_zone="invalid")) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_to_string.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_to_string.py index f8e0b7ea9..385bf3e84 100644 --- a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_to_string.py +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_to_string.py @@ -29,7 +29,7 @@ "None", ], ) -def test_should_handle_iso_8601(value: datetime | date | time | None, expected: str | None): +def test_should_handle_iso_8601(value: datetime | date | time | None, expected: str | None) -> None: assert_cell_operation_works( value, lambda cell: cell.dt.to_string(format="iso"), diff --git a/tests/safeds/data/tabular/typing/_polars_column_type/test_datetime.py b/tests/safeds/data/tabular/typing/_polars_column_type/test_datetime.py index 8ab676e21..002a19df9 100644 --- a/tests/safeds/data/tabular/typing/_polars_column_type/test_datetime.py +++ b/tests/safeds/data/tabular/typing/_polars_column_type/test_datetime.py @@ -3,6 +3,6 @@ from safeds.data.tabular.typing import ColumnType -def test_should_raise_if_time_zone_is_invalid(): +def test_should_raise_if_time_zone_is_invalid() -> None: with pytest.raises(ValueError, match="Invalid time zone"): ColumnType.datetime(time_zone="invalid") From b9acd7fb695f866fe70d639a533d0a4a4455ecc1 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 19 Jan 2025 16:46:47 +0100 Subject: [PATCH 54/57] fix: wrong import --- .../tabular/query/_lazy_datetime_operations/test_to_string.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_to_string.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_to_string.py index 385bf3e84..54cbe1bb0 100644 --- a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_to_string.py +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_to_string.py @@ -1,11 +1,11 @@ from datetime import UTC, date, datetime, time import pytest -from helpers import assert_cell_operation_works from safeds.data.tabular.containers import Column from safeds.data.tabular.typing import ColumnType from safeds.exceptions import LazyComputationError +from tests.helpers import assert_cell_operation_works DATETIME = datetime(1, 2, 3, 4, 5, 6, 7, tzinfo=UTC) DATE = date(1, 2, 3) From bd9a0b27f4fce41534e1ba6e1333349320813ac4 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 19 Jan 2025 17:11:59 +0100 Subject: [PATCH 55/57] fix: failing tests --- src/safeds/data/tabular/containers/_column.py | 4 ++-- .../data/tabular/query/_lazy_string_operations.py | 11 +++++++---- src/safeds/data/tabular/query/_string_operations.py | 4 ++-- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/safeds/data/tabular/containers/_column.py b/src/safeds/data/tabular/containers/_column.py index 6c7b790fa..85edd4ba5 100644 --- a/src/safeds/data/tabular/containers/_column.py +++ b/src/safeds/data/tabular/containers/_column.py @@ -112,8 +112,8 @@ def __init__( # Implementation self._name: str = name - self._lazy_frame: pl.LazyFrame = pl.LazyFrame(data, schema={name: dtype}, strict=False) - self.__series_cache: pl.Series | None = None + self.__series_cache: pl.Series | None = pl.Series(name, data, dtype=dtype, strict=False) + self._lazy_frame: pl.LazyFrame = self.__series_cache.to_frame().lazy() def __contains__(self, value: object) -> bool: import polars as pl diff --git a/src/safeds/data/tabular/query/_lazy_string_operations.py b/src/safeds/data/tabular/query/_lazy_string_operations.py index cb9a2790a..c5750d48c 100644 --- a/src/safeds/data/tabular/query/_lazy_string_operations.py +++ b/src/safeds/data/tabular/query/_lazy_string_operations.py @@ -3,7 +3,7 @@ from typing import TYPE_CHECKING from safeds._utils import _structural_hash -from safeds._validation import _convert_and_check_datetime_format +from safeds._validation import _convert_and_check_datetime_format, _check_bounds, _ClosedBound from safeds.data.tabular.containers._lazy_cell import _LazyCell from ._string_operations import StringOperations @@ -75,9 +75,12 @@ def substring( start: _ConvertibleToIntCell = 0, length: _ConvertibleToIntCell = None, ) -> Cell[str | None]: + if isinstance(length, int): + _check_bounds("length", length, lower_bound=_ClosedBound(0)) + return _LazyCell(self._expression.str.slice(start, length)) - def to_date(self, *, format: str | None = None) -> Cell[datetime.date | None]: + def to_date(self, *, format: str | None = "iso") -> Cell[datetime.date | None]: if format == "iso": format = "%F" # noqa: A001 elif format is not None: @@ -85,7 +88,7 @@ def to_date(self, *, format: str | None = None) -> Cell[datetime.date | None]: return _LazyCell(self._expression.str.to_date(format=format, strict=False)) - def to_datetime(self, *, format: str | None = None) -> Cell[datetime.datetime | None]: + def to_datetime(self, *, format: str | None = "iso") -> Cell[datetime.datetime | None]: if format == "iso": format = "%+" # noqa: A001 elif format is not None: @@ -99,7 +102,7 @@ def to_int(self, *, base: _ConvertibleToIntCell = 10) -> Cell[int | None]: def to_lowercase(self) -> Cell[str | None]: return _LazyCell(self._expression.str.to_lowercase()) - def to_time(self, *, format: str | None = None) -> Cell[datetime.time | None]: + def to_time(self, *, format: str | None = "iso") -> Cell[datetime.time | None]: if format == "iso": format = "%T" # noqa: A001 elif format is not None: diff --git a/src/safeds/data/tabular/query/_string_operations.py b/src/safeds/data/tabular/query/_string_operations.py index 0284d6029..6b99afe59 100644 --- a/src/safeds/data/tabular/query/_string_operations.py +++ b/src/safeds/data/tabular/query/_string_operations.py @@ -303,7 +303,7 @@ def substring( # TODO: add format parameter + document @abstractmethod - def to_date(self, *, format: str | None = None) -> Cell[datetime.date | None]: + def to_date(self, *, format: str | None = "iso") -> Cell[datetime.date | None]: """ Convert the string value in the cell to a date. @@ -331,7 +331,7 @@ def to_date(self, *, format: str | None = None) -> Cell[datetime.date | None]: # TODO: add format parameter + document @abstractmethod - def to_datetime(self, *, format: str | None = None) -> Cell[datetime.datetime | None]: + def to_datetime(self, *, format: str | None = "iso") -> Cell[datetime.datetime | None]: """ Convert the string value in the cell to a datetime. From 209807ec8205cdef038e5aceb64180513cb668f0 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Sun, 19 Jan 2025 16:13:46 +0000 Subject: [PATCH 56/57] style: apply automated linter fixes --- src/safeds/data/tabular/query/_lazy_string_operations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/query/_lazy_string_operations.py b/src/safeds/data/tabular/query/_lazy_string_operations.py index c5750d48c..94cc4ac25 100644 --- a/src/safeds/data/tabular/query/_lazy_string_operations.py +++ b/src/safeds/data/tabular/query/_lazy_string_operations.py @@ -3,7 +3,7 @@ from typing import TYPE_CHECKING from safeds._utils import _structural_hash -from safeds._validation import _convert_and_check_datetime_format, _check_bounds, _ClosedBound +from safeds._validation import _check_bounds, _ClosedBound, _convert_and_check_datetime_format from safeds.data.tabular.containers._lazy_cell import _LazyCell from ._string_operations import StringOperations From 9af75031aabe987d21d74d64ebd9b8703b938155 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 19 Jan 2025 17:30:59 +0100 Subject: [PATCH 57/57] test: add missing tests --- .../_validation/_check_time_zone_module.py | 2 +- ...onvert_and_check_datetime_format_module.py | 4 +-- .../test_to_string.py | 27 +++++++++++++++++++ 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/src/safeds/_validation/_check_time_zone_module.py b/src/safeds/_validation/_check_time_zone_module.py index a05126c73..6f134fd4b 100644 --- a/src/safeds/_validation/_check_time_zone_module.py +++ b/src/safeds/_validation/_check_time_zone_module.py @@ -28,7 +28,7 @@ def _build_error_message(time_zone: str) -> str: result = f"Invalid time zone '{time_zone}'." similar_time_zones = _get_similar_strings(time_zone, _VALID_TZ_IDENTIFIERS) - if similar_time_zones: + if similar_time_zones: # pragma: no cover result += f" Did you mean one of {similar_time_zones}?" return result diff --git a/src/safeds/_validation/_convert_and_check_datetime_format_module.py b/src/safeds/_validation/_convert_and_check_datetime_format_module.py index dc6abaa62..91b40b191 100644 --- a/src/safeds/_validation/_convert_and_check_datetime_format_module.py +++ b/src/safeds/_validation/_convert_and_check_datetime_format_module.py @@ -120,7 +120,7 @@ def _convert_and_check_datetime_format( if char == "\\" and char_at(format_, index + 1) == "\\": converted_format += "\\" index += 2 - if char == "\\" and char_at(format_, index + 1) == "{": + elif char == "\\" and char_at(format_, index + 1) == "{": converted_format += "{" index += 2 # Characters that need to be escaped for rust's chrono crate @@ -190,7 +190,7 @@ def _build_error_message( result = f"Invalid specifier '{expression}' for type {type_}." similar_expressions = _get_similar_strings(expression, valid_expressions) - if similar_expressions: + if similar_expressions: # pragma: no cover result += f" Did you mean one of {similar_expressions}?" return result diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_to_string.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_to_string.py index 54cbe1bb0..a5041f376 100644 --- a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_to_string.py +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_to_string.py @@ -205,6 +205,33 @@ def test_should_be_replaced_with_correct_string(self, value: datetime, format_: ) +@pytest.mark.parametrize( + ("format_", "expected"), + [ + ("\\", "\\"), + ("\\\\", "\\"), + ("\\{", "{"), + ("%", "%"), + ("\n", "\n"), + ("\t", "\t"), + ], + ids=[ + "backslash at end", + "escaped backslash", + "escaped open curly brace", + "percent", + "newline", + "tab", + ], +) +def test_should_handle_escape_sequences(format_: str, expected: date | time | None) -> None: + assert_cell_operation_works( + DATETIME, + lambda cell: cell.dt.to_string(format=format_), + expected, + ) + + def test_should_raise_for_unclosed_specifier() -> None: column = Column("a", [DATETIME]) with pytest.raises(ValueError, match="Unclosed specifier"):