diff --git a/docs/tutorials/data_processing.ipynb b/docs/tutorials/data_processing.ipynb index c73d6162a..38ea4ce66 100644 --- a/docs/tutorials/data_processing.ipynb +++ b/docs/tutorials/data_processing.ipynb @@ -688,64 +688,14 @@ ] }, { + "metadata": {}, "cell_type": "code", - "execution_count": 11, - "metadata": { - "ExecuteTime": { - "end_time": "2024-05-24T11:02:33.599165800Z", - "start_time": "2024-05-24T11:02:33.479893800Z" - }, - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (10, 12)
idnamesexagesiblings_spousesparents_childrentickettravel_classfarecabinport_embarkedsurvived
i64strstrf64i64i64stri64f64strstri64
0"Abbing, Mr. Anthony""male"0.52400800"C.A. 5547"37.55null"Southampton"0
1"Abbott, Master. Eugene Joseph""male"0.16075102"C.A. 2673"320.25null"Southampton"0
2"Abbott, Mr. Rossmore Edward""male"0.1983311"C.A. 2673"320.25null"Southampton"0
3"Abbott, Mrs. Stanton (Rosa Hun…"female"0.43632511"C.A. 2673"320.25null"Southampton"1
4"Abelseth, Miss. Karen Marie""female"0.1983300"348125"37.65null"Southampton"1
5"Abelseth, Mr. Olaus Jorgensen""male"0.31106400"348122"37.65"F G63""Southampton"1
6"Abelson, Mr. Samuel""male"0.37369510"P/PP 3381"224.0null"Cherbourg"0
7"Abelson, Mrs. Samuel (Hannah W…"female"0.34864310"P/PP 3381"224.0null"Cherbourg"1
8"Abrahamsson, Mr. Abraham Augus…"male"0.24843400"SOTON/O2 3101284"37.925null"Southampton"1
9"Abrahim, Mrs. Joseph (Sophie H…"female"0.22338200"2657"37.2292null"Cherbourg"1
" - ], - "text/plain": [ - "+-----+-----------------------+--------+---------+---+----------+-------+---------------+----------+\n", - "| id | name | sex | age | … | fare | cabin | port_embarked | survived |\n", - "| --- | --- | --- | --- | | --- | --- | --- | --- |\n", - "| i64 | str | str | f64 | | f64 | str | str | i64 |\n", - "+==================================================================================================+\n", - "| 0 | Abbing, Mr. Anthony | male | 0.52401 | … | 7.55000 | null | Southampton | 0 |\n", - "| 1 | Abbott, Master. | male | 0.16075 | … | 20.25000 | null | Southampton | 0 |\n", - "| | Eugene Joseph | | | | | | | |\n", - "| 2 | Abbott, Mr. Rossmore | male | 0.19833 | … | 20.25000 | null | Southampton | 0 |\n", - "| | Edward | | | | | | | |\n", - "| 3 | Abbott, Mrs. Stanton | female | 0.43633 | … | 20.25000 | null | Southampton | 1 |\n", - "| | (Rosa Hun… | | | | | | | |\n", - "| 4 | Abelseth, Miss. Karen | female | 0.19833 | … | 7.65000 | null | Southampton | 1 |\n", - "| | Marie | | | | | | | |\n", - "| 5 | Abelseth, Mr. Olaus | male | 0.31106 | … | 7.65000 | F G63 | Southampton | 1 |\n", - "| | Jorgensen | | | | | | | |\n", - "| 6 | Abelson, Mr. Samuel | male | 0.37369 | … | 24.00000 | null | Cherbourg | 0 |\n", - "| 7 | Abelson, Mrs. Samuel | female | 0.34864 | … | 24.00000 | null | Cherbourg | 1 |\n", - "| | (Hannah W… | | | | | | | |\n", - "| 8 | Abrahamsson, Mr. | male | 0.24843 | … | 7.92500 | null | Southampton | 1 |\n", - "| | Abraham Augus… | | | | | | | |\n", - "| 9 | Abrahim, Mrs. Joseph | female | 0.22338 | … | 7.22920 | null | Cherbourg | 1 |\n", - "| | (Sophie H… | | | | | | | |\n", - "+-----+-----------------------+--------+---------+---+----------+-------+---------------+----------+" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], + "execution_count": null, "source": [ "from safeds.data.tabular.transformation import RangeScaler\n", "\n", - "scaler = RangeScaler(selector=\"age\", min_=0.0, max_=1.0).fit(titanic)\n", + "scaler = RangeScaler(selector=\"age\", min=0.0, max=1.0).fit(titanic)\n", "scaler.transform(titanic_slice)" ] }, diff --git a/mkdocs.yml b/mkdocs.yml index 343a4d52c..527e9e50c 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -84,6 +84,7 @@ plugins: show_signature: false show_symbol_type_heading: true show_symbol_type_toc: true + summary: true - gen-files: scripts: - docs/reference/generate_reference_pages.py diff --git a/pyproject.toml b/pyproject.toml index 1b7df81e4..f2bf9aad1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -146,6 +146,8 @@ ignore = [ "FBT001", # boolean-default-value-in-function-definition (we leave it to the call-site) "FBT002", + # builtin-argument-shadowing (we want readable parameter names in our API) + "A002", # builtin-attribute-shadowing (not an issue) "A003", # implicit-return (can add a return even though all cases are covered) diff --git a/src/safeds/_typing/__init__.py b/src/safeds/_typing/__init__.py index b418fe5f9..3e9087645 100644 --- a/src/safeds/_typing/__init__.py +++ b/src/safeds/_typing/__init__.py @@ -6,21 +6,23 @@ from safeds.data.tabular.containers import Cell +# Literals _NumericLiteral: TypeAlias = int | float | Decimal _TemporalLiteral: TypeAlias = datetime.date | datetime.time | datetime.datetime | datetime.timedelta _PythonLiteral: TypeAlias = _NumericLiteral | bool | str | bytes | _TemporalLiteral + +# Convertible to cell (we cannot restrict `Cell`, because `Row.get_cell` returns a `Cell[Any]`) _ConvertibleToCell: TypeAlias = _PythonLiteral | Cell | None -_BooleanCell: TypeAlias = Cell[bool | None] -# We cannot restrict `Cell`, because `Row.get_cell` returns a `Cell[Any]`. _ConvertibleToBooleanCell: TypeAlias = bool | Cell | None _ConvertibleToIntCell: TypeAlias = int | Cell | None +_ConvertibleToStringCell: TypeAlias = str | Cell | None __all__ = [ - "_BooleanCell", "_ConvertibleToBooleanCell", "_ConvertibleToCell", "_ConvertibleToIntCell", + "_ConvertibleToStringCell", "_NumericLiteral", "_PythonLiteral", "_TemporalLiteral", diff --git a/src/safeds/_utils/__init__.py b/src/safeds/_utils/__init__.py index fc3d501d4..ca7eabfa8 100644 --- a/src/safeds/_utils/__init__.py +++ b/src/safeds/_utils/__init__.py @@ -10,16 +10,18 @@ from ._lazy import _safe_collect_lazy_frame, _safe_collect_lazy_frame_schema from ._plotting import _figure_to_image from ._random import _get_random_seed + from ._string import _get_similar_strings apipkg.initpkg( __name__, { "_compute_duplicates": "._collections:_compute_duplicates", - "_structural_hash": "._hashing:_structural_hash", - "_safe_collect_lazy_frame": "._lazy:_safe_collect_lazy_frame", - "_safe_collect_lazy_frame_schema": "._lazy:_safe_collect_lazy_frame_schema", "_figure_to_image": "._plotting:_figure_to_image", "_get_random_seed": "._random:_get_random_seed", + "_get_similar_strings": "._string:_get_similar_strings", + "_safe_collect_lazy_frame": "._lazy:_safe_collect_lazy_frame", + "_safe_collect_lazy_frame_schema": "._lazy:_safe_collect_lazy_frame_schema", + "_structural_hash": "._hashing:_structural_hash", }, ) @@ -27,6 +29,7 @@ "_compute_duplicates", "_figure_to_image", "_get_random_seed", + "_get_similar_strings", "_safe_collect_lazy_frame", "_safe_collect_lazy_frame_schema", "_structural_hash", diff --git a/src/safeds/_utils/_string.py b/src/safeds/_utils/_string.py new file mode 100644 index 000000000..6aadadce8 --- /dev/null +++ b/src/safeds/_utils/_string.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Iterable + + +def _get_similar_strings(string: str, valid_strings: Iterable[str]) -> list[str]: + from difflib import get_close_matches + + close_matches = get_close_matches(string, valid_strings, n=3) + + if close_matches and close_matches[0] == string: + return close_matches[0:1] + else: + return close_matches diff --git a/src/safeds/_validation/__init__.py b/src/safeds/_validation/__init__.py index efb49c1b6..004386c24 100644 --- a/src/safeds/_validation/__init__.py +++ b/src/safeds/_validation/__init__.py @@ -13,6 +13,8 @@ from ._check_indices_module import _check_indices from ._check_row_counts_are_equal_module import _check_row_counts_are_equal from ._check_schema_module import _check_schema + from ._check_time_zone_module import _check_time_zone + from ._convert_and_check_datetime_format_module import _convert_and_check_datetime_format from ._normalize_and_check_file_path_module import _normalize_and_check_file_path apipkg.initpkg( @@ -29,6 +31,8 @@ "_check_indices": "._check_indices_module:_check_indices", "_check_row_counts_are_equal": "._check_row_counts_are_equal_module:_check_row_counts_are_equal", "_check_schema": "._check_schema_module:_check_schema", + "_check_time_zone": "._check_time_zone_module:_check_time_zone", + "_convert_and_check_datetime_format": "._convert_and_check_datetime_format_module:_convert_and_check_datetime_format", "_normalize_and_check_file_path": "._normalize_and_check_file_path_module:_normalize_and_check_file_path", }, ) @@ -45,5 +49,7 @@ "_check_indices", "_check_row_counts_are_equal", "_check_schema", + "_check_time_zone", + "_convert_and_check_datetime_format", "_normalize_and_check_file_path", ] diff --git a/src/safeds/_validation/_check_columns_exist_module.py b/src/safeds/_validation/_check_columns_exist_module.py index 7e1724921..d426f148b 100644 --- a/src/safeds/_validation/_check_columns_exist_module.py +++ b/src/safeds/_validation/_check_columns_exist_module.py @@ -4,6 +4,7 @@ from typing import TYPE_CHECKING +from safeds._utils import _get_similar_strings from safeds.exceptions import ColumnNotFoundError if TYPE_CHECKING: @@ -52,20 +53,9 @@ def _build_error_message(schema: Schema, unknown_names: list[str]) -> str: result = "Could not find column(s):" for unknown_name in unknown_names: - similar_columns = _get_similar_column_names(schema, unknown_name) + similar_columns = _get_similar_strings(unknown_name, schema.column_names) result += f"\n - '{unknown_name}'" if similar_columns: result += f": Did you mean one of {similar_columns}?" return result - - -def _get_similar_column_names(schema: Schema, name: str) -> list[str]: - from difflib import get_close_matches - - close_matches = get_close_matches(name, schema.column_names, n=3) - - if close_matches and close_matches[0] == name: - return close_matches[0:1] - else: - return close_matches diff --git a/src/safeds/_validation/_check_time_zone_module.py b/src/safeds/_validation/_check_time_zone_module.py new file mode 100644 index 000000000..6f134fd4b --- /dev/null +++ b/src/safeds/_validation/_check_time_zone_module.py @@ -0,0 +1,34 @@ +import zoneinfo + +from safeds._utils import _get_similar_strings + +_VALID_TZ_IDENTIFIERS = zoneinfo.available_timezones() + + +def _check_time_zone(time_zone: str | None) -> None: + """ + Check if the time zone is valid. + + Parameters + ---------- + time_zone: + The time zone to check. + + Raises + ------ + ValueError + If the time zone is invalid. + """ + if time_zone is not None and time_zone not in _VALID_TZ_IDENTIFIERS: + message = _build_error_message(time_zone) + raise ValueError(message) + + +def _build_error_message(time_zone: str) -> str: + result = f"Invalid time zone '{time_zone}'." + + similar_time_zones = _get_similar_strings(time_zone, _VALID_TZ_IDENTIFIERS) + if similar_time_zones: # pragma: no cover + result += f" Did you mean one of {similar_time_zones}?" + + return result diff --git a/src/safeds/_validation/_convert_and_check_datetime_format_module.py b/src/safeds/_validation/_convert_and_check_datetime_format_module.py new file mode 100644 index 000000000..91b40b191 --- /dev/null +++ b/src/safeds/_validation/_convert_and_check_datetime_format_module.py @@ -0,0 +1,196 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Literal + +from safeds._utils import _get_similar_strings + +if TYPE_CHECKING: + from collections.abc import Iterable + +_DATE_REPLACEMENTS = { + # Year + "Y": "Y", + "_Y": "_Y", + "^Y": "-Y", + "Y99": "y", + "_Y99": "_y", + "^Y99": "-y", + # Month + "M": "m", + "_M": "_m", + "^M": "-m", + "M-full": "B", + "M-short": "b", + # Week + "W": "V", + "_W": "_V", + "^W": "-V", + # Day + "D": "d", + "_D": "_d", + "^D": "-d", + "DOW": "u", + "DOW-full": "A", + "DOW-short": "a", + "DOY": "j", + "_DOY": "_j", + "^DOY": "-j", +} + +_TIME_REPLACEMENTS = { + # Hour + "h": "H", + "_h": "_H", + "^h": "-H", + "h12": "I", + "_h12": "_I", + "^h12": "-I", + # Minute + "m": "M", + "_m": "_M", + "^m": "-M", + # Second + "s": "S", + "_s": "_S", + "^s": "-S", + # Fractional seconds + ".f": ".f", + "ms": "3f", + "us": "6f", + "ns": "9f", + # AM/PM + "AM/PM": "p", + "am/pm": "P", +} + +_DATETIME_REPLACEMENTS = { + # Date and time replacements are also valid for datetime + **_DATE_REPLACEMENTS, + **_TIME_REPLACEMENTS, + # Timezone + "z": "z", + ":z": ":z", + # UNIX timestamp + "u": "s", +} + +_DATETIME_REPLACEMENTS_WHEN_PARSING = { + **_DATETIME_REPLACEMENTS, + # Allow omission of minutes for the timezone offset + "z": "#z", + ":z": "#z", +} + + +def _convert_and_check_datetime_format( + format_: str, + type_: Literal["datetime", "date", "time"], + used_for_parsing: bool, +) -> str: + """ + Convert our datetime format string to a format string understood by polars and check for errors. + + Parameters + ---------- + format_: + The datetime format to convert. + type_: + Whether format is for a datetime, date, or time. + used_for_parsing: + Whether the format is used for parsing. + + Returns + ------- + converted_format: + The converted datetime format. + + Raises + ------ + ValueError + If the format is invalid. + """ + replacements = _get_replacements(type_, used_for_parsing) + converted_format = "" + index = 0 + + while index < len(format_): + char = format_[index] + + # Escaped characters + if char == "\\" and char_at(format_, index + 1) == "\\": + converted_format += "\\" + index += 2 + elif char == "\\" and char_at(format_, index + 1) == "{": + converted_format += "{" + index += 2 + # Characters that need to be escaped for rust's chrono crate + elif char == "\n": + converted_format += "%n" + index += 1 + elif char == "\t": + converted_format += "%t" + index += 1 + elif char == "%": + converted_format += "%%" + index += 1 + # Template expression + elif char == "{": + end_index = format_.find("}", index) + if end_index == -1: + message = f"Unclosed specifier at index {index}." + raise ValueError(message) + + expression = format_[index + 1 : end_index] + converted_format += _convert_and_check_template_expression(expression, type_, replacements) + index = end_index + 1 + # Regular characters + else: + converted_format += char + index += 1 + + return converted_format + + +def _get_replacements( + type_: Literal["datetime", "date", "time"], + used_for_parsing: bool, +) -> dict[str, str]: + if type_ == "datetime": + return _DATETIME_REPLACEMENTS_WHEN_PARSING if used_for_parsing else _DATETIME_REPLACEMENTS + elif type_ == "date": + return _DATE_REPLACEMENTS + else: + return _TIME_REPLACEMENTS + + +def char_at(string: str, i: int) -> str | None: + if i >= len(string): + return None + return string[i] + + +def _convert_and_check_template_expression( + expression: str, + type_: str, + replacements: dict[str, str], +) -> str: + if expression in replacements: + return "%" + replacements[expression] + + # Unknown specifier + message = _build_error_message(expression, type_, replacements.keys()) + raise ValueError(message) + + +def _build_error_message( + expression: str, + type_: str, + valid_expressions: Iterable[str], +) -> str: + result = f"Invalid specifier '{expression}' for type {type_}." + + similar_expressions = _get_similar_strings(expression, valid_expressions) + if similar_expressions: # pragma: no cover + result += f" Did you mean one of {similar_expressions}?" + + return result diff --git a/src/safeds/_validation/_normalize_and_check_file_path_module.py b/src/safeds/_validation/_normalize_and_check_file_path_module.py index 2ba054dfd..c5fe8e8c8 100644 --- a/src/safeds/_validation/_normalize_and_check_file_path_module.py +++ b/src/safeds/_validation/_normalize_and_check_file_path_module.py @@ -15,12 +15,12 @@ def _normalize_and_check_file_path( check_if_file_exists: bool = False, ) -> Path: """ - Check whether the provided path is a valid file path and normalize it. + Normalize a path and check its validity. Parameters ---------- path: - Path to check and normalize. + Path to normalize and check. canonical_file_extension: If the path has no extension, this extension will be added. It should include the leading dot. valid_file_extensions: diff --git a/src/safeds/data/tabular/containers/__init__.py b/src/safeds/data/tabular/containers/__init__.py index 5c0499ed0..5512b0b1e 100644 --- a/src/safeds/data/tabular/containers/__init__.py +++ b/src/safeds/data/tabular/containers/__init__.py @@ -8,9 +8,7 @@ from ._cell import Cell from ._column import Column from ._row import Row - from ._string_cell import StringCell from ._table import Table - from ._temporal_cell import TemporalCell apipkg.initpkg( __name__, @@ -18,8 +16,6 @@ "Cell": "._cell:Cell", "Column": "._column:Column", "Row": "._row:Row", - "StringCell": "._string_cell:StringCell", - "TemporalCell": "._temporal_cell:TemporalCell", "Table": "._table:Table", }, ) @@ -28,7 +24,5 @@ "Cell", "Column", "Row", - "StringCell", "Table", - "TemporalCell", ] diff --git a/src/safeds/data/tabular/containers/_cell.py b/src/safeds/data/tabular/containers/_cell.py index 0cc022b45..202af7661 100644 --- a/src/safeds/data/tabular/containers/_cell.py +++ b/src/safeds/data/tabular/containers/_cell.py @@ -3,23 +3,22 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING, Generic, TypeVar +from safeds._validation import _check_time_zone + if TYPE_CHECKING: import datetime as python_datetime import polars as pl from safeds._typing import ( - _BooleanCell, _ConvertibleToBooleanCell, _ConvertibleToCell, _ConvertibleToIntCell, _PythonLiteral, ) + from safeds.data.tabular.query import DatetimeOperations, DurationOperations, MathOperations, StringOperations from safeds.data.tabular.typing import ColumnType - from ._string_cell import StringCell - from ._temporal_cell import TemporalCell - T_co = TypeVar("T_co", covariant=True) P = TypeVar("P") @@ -28,7 +27,16 @@ class Cell(ABC, Generic[T_co]): """ A single value in a table. - You only need to interact with this class in callbacks passed to higher-order functions. + You only need to interact with this class in callbacks passed to higher-order functions. Most operations are grouped + into namespaces, which are accessed through the following attributes: + + - `dt`: Operations on datetime/date/time values + - `dur`: Operations on durations + - `math`: Mathematical operations on numbers + - `str`: Operations on strings + + This class only has methods that are not specific to a data type (e.g. `cast`), methods with corresponding + operators (e.g. `add` for `+`), and static methods to create new cells. """ # ------------------------------------------------------------------------------------------------------------------ @@ -128,9 +136,9 @@ def date( return _LazyCell( pl.date( - year=_unwrap(year), - month=_unwrap(month), - day=_unwrap(day), + year=_to_polars_expression(year), + month=_to_polars_expression(month), + day=_to_polars_expression(day), ), ) @@ -144,6 +152,7 @@ def datetime( minute: _ConvertibleToIntCell = 0, second: _ConvertibleToIntCell = 0, microsecond: _ConvertibleToIntCell = 0, + time_zone: str | None = None, ) -> Cell[python_datetime.datetime | None]: """ Create a cell with a datetime. @@ -166,6 +175,10 @@ def datetime( The second. Must be between 0 and 59. microsecond: The microsecond. Must be between 0 and 999,999. + time_zone: + The time zone. If None, values are assumed to be in local time. This is different from setting the time zone + to `"UTC"`. Any TZ identifier defined in the + [tz database](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) is valid. Returns ------- @@ -202,18 +215,31 @@ def datetime( from ._lazy_cell import _LazyCell # circular import - pl_year = _unwrap(year) - pl_month = _unwrap(month) - pl_day = _unwrap(day) - pl_hour = _unwrap(hour) - pl_minute = _unwrap(minute) - pl_second = _unwrap(second) - pl_microsecond = _unwrap(microsecond) + _check_time_zone(time_zone) + + pl_year = _to_polars_expression(year) + pl_month = _to_polars_expression(month) + pl_day = _to_polars_expression(day) + pl_hour = _to_polars_expression(hour) + pl_minute = _to_polars_expression(minute) + pl_second = _to_polars_expression(second) + pl_microsecond = _to_polars_expression(microsecond) # By default, microseconds overflow into seconds return _LazyCell( pl.when(pl_microsecond <= 999_999) - .then(pl.datetime(pl_year, pl_month, pl_day, pl_hour, pl_minute, pl_second, pl_microsecond)) + .then( + pl.datetime( + pl_year, + pl_month, + pl_day, + pl_hour, + pl_minute, + pl_second, + pl_microsecond, + time_zone=time_zone, + ), + ) .otherwise(None), ) @@ -287,13 +313,13 @@ def duration( return _LazyCell( pl.duration( - weeks=_unwrap(weeks), - days=_unwrap(days), - hours=_unwrap(hours), - minutes=_unwrap(minutes), - seconds=_unwrap(seconds), - milliseconds=_unwrap(milliseconds), - microseconds=_unwrap(microseconds), + weeks=_to_polars_expression(weeks), + days=_to_polars_expression(days), + hours=_to_polars_expression(hours), + minutes=_to_polars_expression(minutes), + seconds=_to_polars_expression(seconds), + milliseconds=_to_polars_expression(milliseconds), + microseconds=_to_polars_expression(microseconds), ), ) @@ -356,10 +382,10 @@ def time( from ._lazy_cell import _LazyCell # circular import - pl_hour = _unwrap(hour) - pl_minute = _unwrap(minute) - pl_second = _unwrap(second) - pl_microsecond = _unwrap(microsecond) + pl_hour = _to_polars_expression(hour) + pl_minute = _to_polars_expression(minute) + pl_second = _to_polars_expression(second) + pl_microsecond = _to_polars_expression(microsecond) # By default, microseconds overflow into seconds return _LazyCell( @@ -391,7 +417,7 @@ def first_not_none(cells: list[Cell[P]]) -> Cell[P | None]: if not cells: return Cell.constant(None) - return _LazyCell(pl.coalesce([_unwrap(cell) for cell in cells])) + return _LazyCell(pl.coalesce([_to_polars_expression(cell) for cell in cells])) # ------------------------------------------------------------------------------------------------------------------ # Dunder methods @@ -400,46 +426,46 @@ def first_not_none(cells: list[Cell[P]]) -> Cell[P | None]: # "Boolean" operators (actually bitwise) ----------------------------------- @abstractmethod - def __invert__(self) -> _BooleanCell: ... + def __invert__(self) -> Cell[bool | None]: ... @abstractmethod - def __and__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: ... + def __and__(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: ... @abstractmethod - def __rand__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: ... + def __rand__(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: ... @abstractmethod - def __or__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: ... + def __or__(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: ... @abstractmethod - def __ror__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: ... + def __ror__(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: ... @abstractmethod - def __xor__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: ... + def __xor__(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: ... @abstractmethod - def __rxor__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: ... + def __rxor__(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: ... # Comparison --------------------------------------------------------------- @abstractmethod - def __eq__(self, other: _ConvertibleToCell) -> _BooleanCell: # type: ignore[override] + def __eq__(self, other: _ConvertibleToCell) -> Cell[bool | None]: # type: ignore[override] ... @abstractmethod - def __ge__(self, other: _ConvertibleToCell) -> _BooleanCell: ... + def __ge__(self, other: _ConvertibleToCell) -> Cell[bool | None]: ... @abstractmethod - def __gt__(self, other: _ConvertibleToCell) -> _BooleanCell: ... + def __gt__(self, other: _ConvertibleToCell) -> Cell[bool | None]: ... @abstractmethod - def __le__(self, other: _ConvertibleToCell) -> _BooleanCell: ... + def __le__(self, other: _ConvertibleToCell) -> Cell[bool | None]: ... @abstractmethod - def __lt__(self, other: _ConvertibleToCell) -> _BooleanCell: ... + def __lt__(self, other: _ConvertibleToCell) -> Cell[bool | None]: ... @abstractmethod - def __ne__(self, other: _ConvertibleToCell) -> _BooleanCell: # type: ignore[override] + def __ne__(self, other: _ConvertibleToCell) -> Cell[bool | None]: # type: ignore[override] ... # Numeric operators -------------------------------------------------------- @@ -521,58 +547,107 @@ def __str__(self) -> str: ... @property @abstractmethod - def str(self) -> StringCell: + def dt(self) -> DatetimeOperations: """ - Namespace for operations on strings. + Namespace for operations on datetime/date/time values. Examples -------- + >>> from datetime import datetime >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", ["hi", "hello"]) - >>> column.transform(lambda cell: cell.str.length()) + >>> column = Column("a", [datetime(2025, 1, 1), datetime(2024, 1, 1)]) + >>> column.transform(lambda cell: cell.dt.year()) + +------+ + | a | + | --- | + | i32 | + +======+ + | 2025 | + | 2024 | + +------+ + """ + + @property + @abstractmethod + def dur(self) -> DurationOperations: + """ + Namespace for operations on durations. + + Examples + -------- + >>> from datetime import timedelta + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [timedelta(hours=1), timedelta(hours=2)]) + >>> column.transform(lambda cell: cell.dur.full_hours()) +-----+ | a | | --- | - | u32 | + | i64 | +=====+ + | 1 | | 2 | - | 5 | +-----+ """ @property @abstractmethod - def dt(self) -> TemporalCell: + def math(self) -> MathOperations: """ - Namespace for operations on temporal values. + Namespace for mathematical operations. Examples -------- - >>> import datetime + >>> from datetime import timedelta >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", [datetime.datetime(2025, 1, 1), datetime.datetime(2024, 1, 1)]) - >>> column.transform(lambda cell: cell.dt.year()) - +------+ - | a | - | --- | - | i32 | - +======+ - | 2025 | - | 2024 | - +------+ + >>> column = Column("a", [1, -2]) + >>> column.transform(lambda cell: cell.math.abs()) + +-----+ + | a | + | --- | + | i64 | + +=====+ + | 1 | + | 2 | + +-----+ + """ + + @property + @abstractmethod + def str(self) -> StringOperations: + """ + Namespace for operations on strings. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", ["hi", "hello"]) + >>> column.transform(lambda cell: cell.str.length()) + +-----+ + | a | + | --- | + | u32 | + +=====+ + | 2 | + | 5 | + +-----+ """ # ------------------------------------------------------------------------------------------------------------------ # Boolean operations # ------------------------------------------------------------------------------------------------------------------ - def not_(self) -> _BooleanCell: + def not_(self) -> Cell[bool | None]: """ - Negate a boolean. This is equivalent to the `~` operator. + Negate a Boolean. This is equivalent to the `~` operator. Do **not** use the `not` operator. Its behavior cannot be overwritten in Python, so it will not work as expected. + Returns + ------- + cell: + The result of the Boolean negation. + Examples -------- >>> from safeds.data.tabular.containers import Column @@ -601,13 +676,23 @@ def not_(self) -> _BooleanCell: """ return self.__invert__() - def and_(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: + def and_(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: """ - Perform a boolean AND operation. This is equivalent to the `&` operator. + Perform a Boolean AND operation. This is equivalent to the `&` operator. Do **not** use the `and` operator. Its behavior cannot be overwritten in Python, so it will not work as expected. + Parameters + ---------- + other: + The right operand. + + Returns + ------- + cell: + The result of the conjunction. + Examples -------- >>> from safeds.data.tabular.containers import Column @@ -636,12 +721,22 @@ def and_(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: """ return self.__and__(other) - def or_(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: + def or_(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: """ - Perform a boolean OR operation. This is equivalent to the `|` operator. + Perform a Boolean OR operation. This is equivalent to the `|` operator. Do **not** use the `or` operator. Its behavior cannot be overwritten in Python, so it will not work as expected. + Parameters + ---------- + other: + The right operand. + + Returns + ------- + cell: + The result of the disjunction. + Examples -------- >>> from safeds.data.tabular.containers import Column @@ -670,9 +765,19 @@ def or_(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: """ return self.__or__(other) - def xor(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: + def xor(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: """ - Perform a boolean XOR operation. This is equivalent to the `^` operator. + Perform a Boolean XOR operation. This is equivalent to the `^` operator. + + Parameters + ---------- + other: + The right operand. + + Returns + ------- + cell: + The result of the exclusive or. Examples -------- @@ -706,73 +811,15 @@ def xor(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: # Numeric operations # ------------------------------------------------------------------------------------------------------------------ - def abs(self) -> Cell: - """ - Get the absolute value. - - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", [1, -2, None]) - >>> column.transform(lambda cell: cell.abs()) - +------+ - | a | - | --- | - | i64 | - +======+ - | 1 | - | 2 | - | null | - +------+ - """ - return self.__abs__() - - def ceil(self) -> Cell: - """ - Round up to the nearest integer. - - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", [1.1, 3.0, None]) - >>> column.transform(lambda cell: cell.ceil()) - +---------+ - | a | - | --- | - | f64 | - +=========+ - | 2.00000 | - | 3.00000 | - | null | - +---------+ - """ - return self.__ceil__() - - def floor(self) -> Cell: - """ - Round down to the nearest integer. - - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> column = Column("a", [1.1, 3.0, None]) - >>> column.transform(lambda cell: cell.floor()) - +---------+ - | a | - | --- | - | f64 | - +=========+ - | 1.00000 | - | 3.00000 | - | null | - +---------+ - """ - return self.__floor__() - def neg(self) -> Cell: """ Negate the value. This is equivalent to the unary `-` operator. + Returns + ------- + cell: + The negated value. + Examples -------- >>> from safeds.data.tabular.containers import Column @@ -805,6 +852,16 @@ def add(self, other: _ConvertibleToCell) -> Cell: """ Add a value. This is equivalent to the `+` operator. + Parameters + ---------- + other: + The right operand. + + Returns + ------- + cell: + The result of the addition. + Examples -------- >>> from safeds.data.tabular.containers import Column @@ -837,6 +894,16 @@ def div(self, other: _ConvertibleToCell) -> Cell: """ Divide by a value. This is equivalent to the `/` operator. + Parameters + ---------- + other: + The right operand. + + Returns + ------- + cell: + The result of the division. + Examples -------- >>> from safeds.data.tabular.containers import Column @@ -869,6 +936,16 @@ def mod(self, other: _ConvertibleToCell) -> Cell: """ Perform a modulo operation. This is equivalent to the `%` operator. + Parameters + ---------- + other: + The right operand. + + Returns + ------- + cell: + The result of the modulo operation. + Examples -------- >>> from safeds.data.tabular.containers import Column @@ -903,6 +980,16 @@ def mul(self, other: _ConvertibleToCell) -> Cell: """ Multiply by a value. This is equivalent to the `*` operator. + Parameters + ---------- + other: + The right operand. + + Returns + ------- + cell: + The result of the multiplication. + Examples -------- >>> from safeds.data.tabular.containers import Column @@ -935,6 +1022,16 @@ def pow(self, other: _ConvertibleToCell) -> Cell: """ Raise to a power. This is equivalent to the `**` operator. + Parameters + ---------- + other: + The right operand. + + Returns + ------- + cell: + The result of the exponentiation. + Examples -------- >>> from safeds.data.tabular.containers import Column @@ -950,7 +1047,6 @@ def pow(self, other: _ConvertibleToCell) -> Cell: | null | +------+ - >>> column.transform(lambda cell: cell ** 3) +------+ | a | @@ -968,6 +1064,16 @@ def sub(self, other: _ConvertibleToCell) -> Cell: """ Subtract a value. This is equivalent to the binary `-` operator. + Parameters + ---------- + other: + The right operand. + + Returns + ------- + cell: + The result of the subtraction. + Examples -------- >>> from safeds.data.tabular.containers import Column @@ -1006,7 +1112,7 @@ def eq( other: _ConvertibleToCell, *, propagate_missing_values: bool = True, - ) -> _BooleanCell: + ) -> Cell[bool | None]: """ Check if equal to a value. The default behavior is equivalent to the `==` operator. @@ -1018,6 +1124,18 @@ def eq( - If `propagate_missing_values` is `False`, `None` will be treated as a regular value. Here, `None == None` is `True`. This behavior is useful, if you want to work with missing values, e.g. to filter them out. + Parameters + ---------- + other: + The value to compare to. + propagate_missing_values: + Whether to propagate missing values. + + Returns + ------- + cell: + The result of the comparison. + Examples -------- >>> from safeds.data.tabular.containers import Column @@ -1062,7 +1180,7 @@ def neq( other: _ConvertibleToCell, *, propagate_missing_values: bool = True, - ) -> _BooleanCell: + ) -> Cell[bool | None]: """ Check if not equal to a value. The default behavior is equivalent to the `!=` operator. @@ -1081,6 +1199,11 @@ def neq( propagate_missing_values: Whether to propagate missing values. + Returns + ------- + cell: + The result of the comparison. + Examples -------- >>> from safeds.data.tabular.containers import Column @@ -1119,10 +1242,20 @@ def neq( +-------+ """ - def ge(self, other: _ConvertibleToCell) -> _BooleanCell: + def ge(self, other: _ConvertibleToCell) -> Cell[bool | None]: """ Check if greater than or equal to a value. This is equivalent to the `>=` operator. + Parameters + ---------- + other: + The value to compare to. + + Returns + ------- + cell: + The result of the comparison. + Examples -------- >>> from safeds.data.tabular.containers import Column @@ -1151,10 +1284,20 @@ def ge(self, other: _ConvertibleToCell) -> _BooleanCell: """ return self.__ge__(other) - def gt(self, other: _ConvertibleToCell) -> _BooleanCell: + def gt(self, other: _ConvertibleToCell) -> Cell[bool | None]: """ Check if greater than a value. This is equivalent to the `>` operator. + Parameters + ---------- + other: + The value to compare to. + + Returns + ------- + cell: + The result of the comparison. + Examples -------- >>> from safeds.data.tabular.containers import Column @@ -1183,10 +1326,20 @@ def gt(self, other: _ConvertibleToCell) -> _BooleanCell: """ return self.__gt__(other) - def le(self, other: _ConvertibleToCell) -> _BooleanCell: + def le(self, other: _ConvertibleToCell) -> Cell[bool | None]: """ Check if less than or equal to a value. This is equivalent to the `<=` operator. + Parameters + ---------- + other: + The value to compare to. + + Returns + ------- + cell: + The result of the comparison. + Examples -------- >>> from safeds.data.tabular.containers import Column @@ -1215,10 +1368,20 @@ def le(self, other: _ConvertibleToCell) -> _BooleanCell: """ return self.__le__(other) - def lt(self, other: _ConvertibleToCell) -> _BooleanCell: + def lt(self, other: _ConvertibleToCell) -> Cell[bool | None]: """ Check if less than a value. This is equivalent to the `<` operator. + Parameters + ---------- + other: + The value to compare to. + + Returns + ------- + cell: + The result of the comparison. + Examples -------- >>> from safeds.data.tabular.containers import Column @@ -1252,13 +1415,13 @@ def lt(self, other: _ConvertibleToCell) -> _BooleanCell: # ------------------------------------------------------------------------------------------------------------------ @abstractmethod - def cast(self, type_: ColumnType) -> Cell: + def cast(self, type: ColumnType) -> Cell: """ Cast the cell to a different type. Parameters ---------- - type_: + type: The type to cast to. Returns @@ -1301,7 +1464,7 @@ def _equals(self, other: object) -> bool: """ -def _unwrap(cell_proxy: _ConvertibleToCell) -> pl.Expr: +def _to_polars_expression(cell_proxy: _ConvertibleToCell) -> pl.Expr: import polars as pl if isinstance(cell_proxy, Cell): diff --git a/src/safeds/data/tabular/containers/_column.py b/src/safeds/data/tabular/containers/_column.py index 6e4f86fc1..85edd4ba5 100644 --- a/src/safeds/data/tabular/containers/_column.py +++ b/src/safeds/data/tabular/containers/_column.py @@ -3,7 +3,7 @@ from collections.abc import Callable, Iterator, Sequence from typing import TYPE_CHECKING, Literal, TypeVar, overload -from safeds._utils import _structural_hash +from safeds._utils import _safe_collect_lazy_frame, _safe_collect_lazy_frame_schema, _structural_hash from safeds._validation import ( _check_column_has_no_missing_values, _check_column_is_numeric, @@ -16,9 +16,8 @@ from ._lazy_cell import _LazyCell if TYPE_CHECKING: - from polars import Series + import polars as pl - from safeds._typing import _BooleanCell from safeds.data.tabular.typing import ColumnType from safeds.exceptions import ( # noqa: F401 ColumnTypeError, @@ -45,7 +44,7 @@ class Column(Sequence[T_co]): The name of the column. data: The data of the column. - type_: + type: The type of the column. If `None` (default), the type is inferred from the data. Examples @@ -63,7 +62,7 @@ class Column(Sequence[T_co]): +-----+ >>> from safeds.data.tabular.typing import ColumnType - >>> Column("a", [1, 2, 3], type_=ColumnType.string()) + >>> Column("a", [1, 2, 3], type=ColumnType.string()) +-----+ | a | | --- | @@ -80,9 +79,19 @@ class Column(Sequence[T_co]): # ------------------------------------------------------------------------------------------------------------------ @staticmethod - def _from_polars_series(data: Series) -> Column: + def _from_polars_lazy_frame(name: str, data: pl.LazyFrame) -> Column: result = object.__new__(Column) - result._series = data + result._name = name + result._lazy_frame = data.select(name) + result.__series_cache = None + return result + + @staticmethod + def _from_polars_series(data: pl.Series) -> Column: + result = object.__new__(Column) + result._name = data.name + result._lazy_frame = data.to_frame().lazy() + result.__series_cache = data return result # ------------------------------------------------------------------------------------------------------------------ @@ -94,15 +103,17 @@ def __init__( name: str, data: Sequence[T_co], *, - type_: ColumnType | None = None, + type: ColumnType | None = None, ) -> None: import polars as pl # Preprocessing - dtype = None if type_ is None else type_._polars_data_type + dtype = None if type is None else type._polars_data_type # Implementation - self._series: pl.Series = pl.Series(name, data, dtype=dtype, strict=False) + self._name: str = name + self.__series_cache: pl.Series | None = pl.Series(name, data, dtype=dtype, strict=False) + self._lazy_frame: pl.LazyFrame = self.__series_cache.to_frame().lazy() def __contains__(self, value: object) -> bool: import polars as pl @@ -129,8 +140,11 @@ def __getitem__(self, index: slice) -> Column[T_co]: ... def __getitem__(self, index: int | slice) -> T_co | Column[T_co]: if isinstance(index, int): return self.get_value(index) - else: - return self._from_polars_series(self._series.__getitem__(index)) + + try: + return self._from_polars_lazy_frame(self.name, self._lazy_frame[index]) + except ValueError: + return self._from_polars_series(self._series[index]) def __hash__(self) -> int: return _structural_hash( @@ -158,6 +172,13 @@ def __str__(self) -> str: # Properties # ------------------------------------------------------------------------------------------------------------------ + @property + def _series(self) -> pl.Series: + if self.__series_cache is None: + self.__series_cache = _safe_collect_lazy_frame(self._lazy_frame).to_series() + + return self.__series_cache + @property def name(self) -> str: """ @@ -170,13 +191,15 @@ def name(self) -> str: >>> column.name 'a' """ - return self._series.name + return self._name @property def row_count(self) -> int: """ The number of rows. + **Note:** This operation must fully load the data into memory, which can be expensive. + Examples -------- >>> from safeds.data.tabular.containers import Column @@ -213,7 +236,8 @@ def type(self) -> ColumnType: >>> column.type int64 """ - return _PolarsColumnType(self._series.dtype) + schema = _safe_collect_lazy_frame_schema(self._lazy_frame) + return _PolarsColumnType(schema.dtypes()[0]) # ------------------------------------------------------------------------------------------------------------------ # Value operations @@ -316,7 +340,8 @@ def get_value(self, index: int) -> T_co: """ _check_indices(self, index) - return self._series.__getitem__(index) + # Lazy containers do not allow indexed accesses + return self._series[index] # ------------------------------------------------------------------------------------------------------------------ # Reductions @@ -325,7 +350,7 @@ def get_value(self, index: int) -> T_co: @overload def all( self, - predicate: Callable[[Cell[T_co]], _BooleanCell], + predicate: Callable[[Cell[T_co]], Cell[bool | None]], *, ignore_unknown: Literal[True] = ..., ) -> bool: ... @@ -333,14 +358,14 @@ def all( @overload def all( self, - predicate: Callable[[Cell[T_co]], _BooleanCell], + predicate: Callable[[Cell[T_co]], Cell[bool | None]], *, ignore_unknown: bool, ) -> bool | None: ... def all( self, - predicate: Callable[[Cell[T_co]], _BooleanCell], + predicate: Callable[[Cell[T_co]], Cell[bool | None]], *, ignore_unknown: bool = True, ) -> bool | None: @@ -394,14 +419,15 @@ def all( """ import polars as pl - # Expressions only work on data frames/lazy frames, so we wrap the polars Series first expression = predicate(_LazyCell(pl.col(self.name)))._polars_expression.all(ignore_nulls=ignore_unknown) - return self._series.to_frame().select(expression).item() + frame = _safe_collect_lazy_frame(self._lazy_frame.select(expression)) + + return frame.item() @overload def any( self, - predicate: Callable[[Cell[T_co]], _BooleanCell], + predicate: Callable[[Cell[T_co]], Cell[bool | None]], *, ignore_unknown: Literal[True] = ..., ) -> bool: ... @@ -409,14 +435,14 @@ def any( @overload def any( self, - predicate: Callable[[Cell[T_co]], _BooleanCell], + predicate: Callable[[Cell[T_co]], Cell[bool | None]], *, ignore_unknown: bool, ) -> bool | None: ... def any( self, - predicate: Callable[[Cell[T_co]], _BooleanCell], + predicate: Callable[[Cell[T_co]], Cell[bool | None]], *, ignore_unknown: bool = True, ) -> bool | None: @@ -470,14 +496,15 @@ def any( """ import polars as pl - # Expressions only work on data frames/lazy frames, so we wrap the polars Series first expression = predicate(_LazyCell(pl.col(self.name)))._polars_expression.any(ignore_nulls=ignore_unknown) - return self._series.to_frame().select(expression).item() + frame = _safe_collect_lazy_frame(self._lazy_frame.select(expression)) + + return frame.item() @overload def count_if( self, - predicate: Callable[[Cell[T_co]], _BooleanCell], + predicate: Callable[[Cell[T_co]], Cell[bool | None]], *, ignore_unknown: Literal[True] = ..., ) -> int: ... @@ -485,14 +512,14 @@ def count_if( @overload def count_if( self, - predicate: Callable[[Cell[T_co]], _BooleanCell], + predicate: Callable[[Cell[T_co]], Cell[bool | None]], *, ignore_unknown: bool, ) -> int | None: ... def count_if( self, - predicate: Callable[[Cell[T_co]], _BooleanCell], + predicate: Callable[[Cell[T_co]], Cell[bool | None]], *, ignore_unknown: bool = True, ) -> int | None: @@ -535,11 +562,11 @@ def count_if( """ import polars as pl - # Expressions only work on data frames/lazy frames, so we wrap the polars Series first expression = predicate(_LazyCell(pl.col(self.name)))._polars_expression - series = self._series.to_frame().select(expression.alias(self.name)).get_column(self.name) + frame = _safe_collect_lazy_frame(self._lazy_frame.select(expression)) + series = frame.to_series() - if ignore_unknown or series.null_count() == 0: + if ignore_unknown or not series.has_nulls(): return series.sum() else: return None @@ -547,7 +574,7 @@ def count_if( @overload def none( self, - predicate: Callable[[Cell[T_co]], _BooleanCell], + predicate: Callable[[Cell[T_co]], Cell[bool | None]], *, ignore_unknown: Literal[True] = ..., ) -> bool: ... @@ -555,14 +582,14 @@ def none( @overload def none( self, - predicate: Callable[[Cell[T_co]], _BooleanCell], + predicate: Callable[[Cell[T_co]], Cell[bool | None]], *, ignore_unknown: bool, ) -> bool | None: ... def none( self, - predicate: Callable[[Cell[T_co]], _BooleanCell], + predicate: Callable[[Cell[T_co]], Cell[bool | None]], *, ignore_unknown: bool = True, ) -> bool | None: @@ -616,9 +643,10 @@ def none( """ import polars as pl - # Expressions only work on data frames/lazy frames, so we wrap the polars Series first expression = predicate(_LazyCell(pl.col(self.name)))._polars_expression.not_().all(ignore_nulls=ignore_unknown) - return self._series.to_frame().select(expression).item() + frame = _safe_collect_lazy_frame(self._lazy_frame.select(expression)) + + return frame.item() # ------------------------------------------------------------------------------------------------------------------ # Transformations @@ -655,7 +683,8 @@ def rename(self, new_name: str) -> Column[T_co]: | 3 | +-----+ """ - return self._from_polars_series(self._series.rename(new_name)) + result = self._lazy_frame.rename({self.name: new_name}) + return self._from_polars_lazy_frame(new_name, result) def transform( self, @@ -693,11 +722,10 @@ def transform( """ import polars as pl - # Expressions only work on data frames/lazy frames, so we wrap the polars Series first - expression = transformer(_LazyCell(pl.col(self.name)))._polars_expression - series = self._series.to_frame().with_columns(expression.alias(self.name)).get_column(self.name) + expression = transformer(_LazyCell(pl.col(self.name)))._polars_expression.alias(self.name) + result = self._lazy_frame.with_columns(expression) # with_columns always keeps number of rows - return self._from_polars_series(series) + return self._from_polars_lazy_frame(self.name, result) # ------------------------------------------------------------------------------------------------------------------ # Statistics @@ -1222,7 +1250,7 @@ def to_table(self) -> Table: """ from ._table import Table - return Table._from_polars_data_frame(self._series.to_frame()) + return Table._from_polars_lazy_frame(self._lazy_frame) # ------------------------------------------------------------------------------------------------------------------ # IPython integration diff --git a/src/safeds/data/tabular/containers/_lazy_cell.py b/src/safeds/data/tabular/containers/_lazy_cell.py index e4f850537..6055be763 100644 --- a/src/safeds/data/tabular/containers/_lazy_cell.py +++ b/src/safeds/data/tabular/containers/_lazy_cell.py @@ -4,17 +4,15 @@ from safeds._utils import _structural_hash -from ._cell import Cell, _unwrap +from ._cell import Cell, _to_polars_expression if TYPE_CHECKING: import polars as pl - from safeds._typing import _BooleanCell, _ConvertibleToBooleanCell, _ConvertibleToCell + from safeds._typing import _ConvertibleToBooleanCell, _ConvertibleToCell + from safeds.data.tabular.query import DatetimeOperations, DurationOperations, MathOperations, StringOperations from safeds.data.tabular.typing import ColumnType - from ._string_cell import StringCell - from ._temporal_cell import TemporalCell - T = TypeVar("T") @@ -34,133 +32,133 @@ def __init__(self, expression: pl.Expr) -> None: # "Boolean" operators (actually bitwise) ----------------------------------- - def __invert__(self) -> _BooleanCell: + def __invert__(self) -> Cell[bool | None]: import polars as pl - return _wrap(self._expression.cast(pl.Boolean).__invert__()) + return _LazyCell(self._expression.cast(pl.Boolean).__invert__()) - def __and__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: - other = _unwrap(other) - return _wrap(self._expression.__and__(other)) + def __and__(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: + other = _to_polars_expression(other) + return _LazyCell(self._expression.__and__(other)) - def __rand__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: - other = _unwrap(other) - return _wrap(self._expression.__rand__(other)) + def __rand__(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: + other = _to_polars_expression(other) + return _LazyCell(self._expression.__rand__(other)) - def __or__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: - other = _unwrap(other) - return _wrap(self._expression.__or__(other)) + def __or__(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: + other = _to_polars_expression(other) + return _LazyCell(self._expression.__or__(other)) - def __ror__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: - other = _unwrap(other) - return _wrap(self._expression.__ror__(other)) + def __ror__(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: + other = _to_polars_expression(other) + return _LazyCell(self._expression.__ror__(other)) - def __xor__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: - other = _unwrap(other) - return _wrap(self._expression.__xor__(other)) + def __xor__(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: + other = _to_polars_expression(other) + return _LazyCell(self._expression.__xor__(other)) - def __rxor__(self, other: _ConvertibleToBooleanCell) -> _BooleanCell: - other = _unwrap(other) - return _wrap(self._expression.__rxor__(other)) + def __rxor__(self, other: _ConvertibleToBooleanCell) -> Cell[bool | None]: + other = _to_polars_expression(other) + return _LazyCell(self._expression.__rxor__(other)) # Comparison --------------------------------------------------------------- - def __eq__(self, other: _ConvertibleToCell) -> _BooleanCell: # type: ignore[override] - other = _unwrap(other) - return _wrap(self._expression.__eq__(other)) + def __eq__(self, other: _ConvertibleToCell) -> Cell[bool | None]: # type: ignore[override] + other = _to_polars_expression(other) + return _LazyCell(self._expression.__eq__(other)) - def __ge__(self, other: _ConvertibleToCell) -> _BooleanCell: - other = _unwrap(other) - return _wrap(self._expression.__ge__(other)) + def __ge__(self, other: _ConvertibleToCell) -> Cell[bool | None]: + other = _to_polars_expression(other) + return _LazyCell(self._expression.__ge__(other)) - def __gt__(self, other: _ConvertibleToCell) -> _BooleanCell: - other = _unwrap(other) - return _wrap(self._expression.__gt__(other)) + def __gt__(self, other: _ConvertibleToCell) -> Cell[bool | None]: + other = _to_polars_expression(other) + return _LazyCell(self._expression.__gt__(other)) - def __le__(self, other: _ConvertibleToCell) -> _BooleanCell: - other = _unwrap(other) - return _wrap(self._expression.__le__(other)) + def __le__(self, other: _ConvertibleToCell) -> Cell[bool | None]: + other = _to_polars_expression(other) + return _LazyCell(self._expression.__le__(other)) - def __lt__(self, other: _ConvertibleToCell) -> _BooleanCell: - other = _unwrap(other) - return _wrap(self._expression.__lt__(other)) + def __lt__(self, other: _ConvertibleToCell) -> Cell[bool | None]: + other = _to_polars_expression(other) + return _LazyCell(self._expression.__lt__(other)) - def __ne__(self, other: _ConvertibleToCell) -> _BooleanCell: # type: ignore[override] - other = _unwrap(other) - return _wrap(self._expression.__ne__(other)) + def __ne__(self, other: _ConvertibleToCell) -> Cell[bool | None]: # type: ignore[override] + other = _to_polars_expression(other) + return _LazyCell(self._expression.__ne__(other)) # Numeric operators -------------------------------------------------------- def __abs__(self) -> Cell: - return _wrap(self._expression.__abs__()) + return _LazyCell(self._expression.__abs__()) def __ceil__(self) -> Cell: - return _wrap(self._expression.ceil()) + return _LazyCell(self._expression.ceil()) def __floor__(self) -> Cell: - return _wrap(self._expression.floor()) + return _LazyCell(self._expression.floor()) def __neg__(self) -> Cell: - return _wrap(self._expression.__neg__()) + return _LazyCell(self._expression.__neg__()) def __pos__(self) -> Cell: - return _wrap(self._expression.__pos__()) + return _LazyCell(self._expression.__pos__()) def __add__(self, other: _ConvertibleToCell) -> Cell: - other = _unwrap(other) - return _wrap(self._expression.__add__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__add__(other)) def __radd__(self, other: _ConvertibleToCell) -> Cell: - other = _unwrap(other) - return _wrap(self._expression.__radd__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__radd__(other)) def __floordiv__(self, other: _ConvertibleToCell) -> Cell: - other = _unwrap(other) - return _wrap(self._expression.__floordiv__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__floordiv__(other)) def __rfloordiv__(self, other: _ConvertibleToCell) -> Cell: - other = _unwrap(other) - return _wrap(self._expression.__rfloordiv__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__rfloordiv__(other)) def __mod__(self, other: _ConvertibleToCell) -> Cell: - other = _unwrap(other) - return _wrap(self._expression.__mod__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__mod__(other)) def __rmod__(self, other: _ConvertibleToCell) -> Cell: - other = _unwrap(other) - return _wrap(self._expression.__rmod__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__rmod__(other)) def __mul__(self, other: _ConvertibleToCell) -> Cell: - other = _unwrap(other) - return _wrap(self._expression.__mul__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__mul__(other)) def __rmul__(self, other: _ConvertibleToCell) -> Cell: - other = _unwrap(other) - return _wrap(self._expression.__rmul__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__rmul__(other)) def __pow__(self, other: _ConvertibleToCell) -> Cell: - other = _unwrap(other) - return _wrap(self._expression.__pow__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__pow__(other)) def __rpow__(self, other: _ConvertibleToCell) -> Cell: - other = _unwrap(other) - return _wrap(self._expression.__rpow__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__rpow__(other)) def __sub__(self, other: _ConvertibleToCell) -> Cell: - other = _unwrap(other) - return _wrap(self._expression.__sub__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__sub__(other)) def __rsub__(self, other: _ConvertibleToCell) -> Cell: - other = _unwrap(other) - return _wrap(self._expression.__rsub__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__rsub__(other)) def __truediv__(self, other: _ConvertibleToCell) -> Cell: - other = _unwrap(other) - return _wrap(self._expression.__truediv__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__truediv__(other)) def __rtruediv__(self, other: _ConvertibleToCell) -> Cell: - other = _unwrap(other) - return _wrap(self._expression.__rtruediv__(other)) + other = _to_polars_expression(other) + return _LazyCell(self._expression.__rtruediv__(other)) # Other -------------------------------------------------------------------- @@ -168,7 +166,7 @@ def __hash__(self) -> int: return _structural_hash(self._expression.meta.serialize()) def __repr__(self) -> str: - return self._expression.__repr__() + return f"_LazyCell({self._expression})" def __sizeof__(self) -> int: return self._expression.__sizeof__() @@ -181,42 +179,54 @@ def __str__(self) -> str: # ------------------------------------------------------------------------------------------------------------------ @property - def str(self) -> StringCell: - from ._lazy_string_cell import _LazyStringCell # circular import + def dt(self) -> DatetimeOperations: + from safeds.data.tabular.query._lazy_datetime_operations import _LazyDatetimeOperations # circular import - return _LazyStringCell(self._expression) + return _LazyDatetimeOperations(self._expression) @property - def dt(self) -> TemporalCell: - from ._lazy_temporal_cell import _LazyTemporalCell # circular import + def dur(self) -> DurationOperations: + from safeds.data.tabular.query._lazy_duration_operations import _LazyDurationOperations # circular import + + return _LazyDurationOperations(self._expression) - return _LazyTemporalCell(self._expression) + @property + def math(self) -> MathOperations: + from safeds.data.tabular.query._lazy_math_operations import _LazyMathOperations # circular import + + return _LazyMathOperations(self._expression) + + @property + def str(self) -> StringOperations: + from safeds.data.tabular.query._lazy_string_operations import _LazyStringOperations # circular import + + return _LazyStringOperations(self._expression) # ------------------------------------------------------------------------------------------------------------------ # Comparison operations # ------------------------------------------------------------------------------------------------------------------ - def eq(self, other: _ConvertibleToCell, *, propagate_missing_values: bool = True) -> _BooleanCell: - other = _unwrap(other) + def eq(self, other: _ConvertibleToCell, *, propagate_missing_values: bool = True) -> Cell[bool | None]: + other = _to_polars_expression(other) if propagate_missing_values: - return _wrap(self._expression.eq(other)) + return _LazyCell(self._expression.eq(other)) else: - return _wrap(self._expression.eq_missing(other)) + return _LazyCell(self._expression.eq_missing(other)) - def neq(self, other: _ConvertibleToCell, *, propagate_missing_values: bool = True) -> _BooleanCell: - other = _unwrap(other) + def neq(self, other: _ConvertibleToCell, *, propagate_missing_values: bool = True) -> Cell[bool | None]: + other = _to_polars_expression(other) if propagate_missing_values: - return _wrap(self._expression.ne(other)) + return _LazyCell(self._expression.ne(other)) else: - return _wrap(self._expression.ne_missing(other)) + return _LazyCell(self._expression.ne_missing(other)) # ------------------------------------------------------------------------------------------------------------------ # Other # ------------------------------------------------------------------------------------------------------------------ - def cast(self, type_: ColumnType) -> Cell: - return _wrap(self._expression.cast(type_._polars_data_type)) + def cast(self, type: ColumnType) -> Cell: + return _LazyCell(self._expression.cast(type._polars_data_type)) # ------------------------------------------------------------------------------------------------------------------ # Internal @@ -232,7 +242,3 @@ def _equals(self, other: object) -> bool: if self is other: return True return self._expression.meta.eq(other._expression) - - -def _wrap(expression: pl.Expr) -> Cell: - return _LazyCell(expression) diff --git a/src/safeds/data/tabular/containers/_lazy_string_cell.py b/src/safeds/data/tabular/containers/_lazy_string_cell.py deleted file mode 100644 index 20f80e0fc..000000000 --- a/src/safeds/data/tabular/containers/_lazy_string_cell.py +++ /dev/null @@ -1,101 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -from safeds._utils import _structural_hash -from safeds._validation import _check_bounds, _ClosedBound - -from ._lazy_cell import _LazyCell -from ._string_cell import StringCell - -if TYPE_CHECKING: - import datetime - - import polars as pl - - from ._cell import Cell - - -class _LazyStringCell(StringCell): - # ------------------------------------------------------------------------------------------------------------------ - # Dunder methods - # ------------------------------------------------------------------------------------------------------------------ - - def __init__(self, expression: pl.Expr) -> None: - self._expression: pl.Expr = expression - - def __hash__(self) -> int: - return _structural_hash(self._expression.meta.serialize()) - - def __sizeof__(self) -> int: - return self._expression.__sizeof__() - - # ------------------------------------------------------------------------------------------------------------------ - # String operations - # ------------------------------------------------------------------------------------------------------------------ - - def contains(self, substring: str) -> Cell[bool]: - return _LazyCell(self._expression.str.contains(substring, literal=True)) - - def length(self, optimize_for_ascii: bool = False) -> Cell[int]: - if optimize_for_ascii: - return _LazyCell(self._expression.str.len_bytes()) - else: - return _LazyCell(self._expression.str.len_chars()) - - def ends_with(self, suffix: str) -> Cell[bool]: - return _LazyCell(self._expression.str.ends_with(suffix)) - - def index_of(self, substring: str) -> Cell[int | None]: - return _LazyCell(self._expression.str.find(substring, literal=True)) - - def replace(self, old: str, new: str) -> Cell[str]: - return _LazyCell(self._expression.str.replace_all(old, new, literal=True)) - - def starts_with(self, prefix: str) -> Cell[bool]: - return _LazyCell(self._expression.str.starts_with(prefix)) - - def substring(self, start: int = 0, length: int | None = None) -> Cell[str]: - _check_bounds("length", length, lower_bound=_ClosedBound(0)) - - return _LazyCell(self._expression.str.slice(start, length)) - - def to_date(self) -> Cell[datetime.date | None]: - return _LazyCell(self._expression.str.to_date(format="%F", strict=False)) - - def to_datetime(self) -> Cell[datetime.datetime | None]: - return _LazyCell(self._expression.str.to_datetime(format="%+", strict=False)) - - def to_int(self, *, base: int = 10) -> Cell[int | None]: - return _LazyCell(self._expression.str.to_integer(base=base, strict=False)) - - def to_float(self) -> Cell[float | None]: - import polars as pl - - return _LazyCell(self._expression.cast(pl.Float64, strict=False)) - - def to_lowercase(self) -> Cell[str]: - return _LazyCell(self._expression.str.to_lowercase()) - - def to_uppercase(self) -> Cell[str]: - return _LazyCell(self._expression.str.to_uppercase()) - - def trim(self) -> Cell[str]: - return _LazyCell(self._expression.str.strip_chars()) - - def trim_end(self) -> Cell[str]: - return _LazyCell(self._expression.str.strip_chars_end()) - - def trim_start(self) -> Cell[str]: - return _LazyCell(self._expression.str.strip_chars_start()) - - # ------------------------------------------------------------------------------------------------------------------ - # Internal - # ------------------------------------------------------------------------------------------------------------------ - - def _equals(self, other: object) -> bool: - if not isinstance(other, _LazyStringCell): - return NotImplemented - if self is other: - return True - return self._expression.meta.eq(other._expression.meta) diff --git a/src/safeds/data/tabular/containers/_lazy_temporal_cell.py b/src/safeds/data/tabular/containers/_lazy_temporal_cell.py deleted file mode 100644 index 180ecb58c..000000000 --- a/src/safeds/data/tabular/containers/_lazy_temporal_cell.py +++ /dev/null @@ -1,124 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -from safeds._utils import _structural_hash - -from ._lazy_cell import _LazyCell -from ._temporal_cell import TemporalCell - -if TYPE_CHECKING: - import polars as pl - - from ._cell import Cell - - -class _LazyTemporalCell(TemporalCell): - # ------------------------------------------------------------------------------------------------------------------ - # Dunder methods - # ------------------------------------------------------------------------------------------------------------------ - - def __init__(self, expression: pl.Expr) -> None: - self._expression: pl.Expr = expression - - def __hash__(self) -> int: - return _structural_hash(self._expression.meta.serialize()) - - def __sizeof__(self) -> int: - return self._expression.__sizeof__() - - # ------------------------------------------------------------------------------------------------------------------ - # Temporal operations - # ------------------------------------------------------------------------------------------------------------------ - - def century(self) -> Cell[int]: - return _LazyCell(self._expression.dt.century()) - - def weekday(self) -> Cell[int]: - return _LazyCell(self._expression.dt.weekday()) - - def week(self) -> Cell[int]: - return _LazyCell(self._expression.dt.week()) - - def year(self) -> Cell[int]: - return _LazyCell(self._expression.dt.year()) - - def month(self) -> Cell[int]: - return _LazyCell(self._expression.dt.month()) - - def day(self) -> Cell[int]: - return _LazyCell(self._expression.dt.day()) - - def datetime_to_string(self, format_string: str = "%Y/%m/%d %H:%M:%S") -> Cell[str]: - if not _check_format_string(format_string): - raise ValueError("Invalid format string") - return _LazyCell(self._expression.dt.to_string(format=format_string)) - - def date_to_string(self, format_string: str = "%F") -> Cell[str]: - if not _check_format_string(format_string): - # Fehler in _check_format_string - raise ValueError("Invalid format string") - return _LazyCell(self._expression.dt.to_string(format=format_string)) - - # ------------------------------------------------------------------------------------------------------------------ - # Internal - # ------------------------------------------------------------------------------------------------------------------ - - def _equals(self, other: object) -> bool: - if not isinstance(other, _LazyTemporalCell): - return NotImplemented - if self is other: - return True - return self._expression.meta.eq(other._expression.meta) - - -def _check_format_string(format_string: str) -> bool: - valid_format_codes = { - "F": "the standard", - "a": "abbreviated weekday name", - "A": "full weekday name", - "w": "weekday as a decimal number", - "d": "day of the month as a zero-padded decimal number", - "b": "abbreviated month name", - "B": "full month name", - "m": "month as a zero-padded decimal number", - "y": "year without century as a zero-padded decimal number", - "Y": "year with century as a decimal number", - "H": "hour (24-hour clock) as a zero-padded decimal number", - "I": "hour (12-hour clock) as a zero-padded decimal number", - "p": "locale's equivalent of either AM or PM", - "M": "minute as a zero-padded decimal number", - "S": "second as a zero-padded decimal number", - "f": "microsecond as a zero-padded decimal number", - "z": "UTC offset in the form ±HHMM[SS[.ffffff]]", - "Z": "time zone name", - "j": "day of the year as a zero-padded decimal number", - "U": "week number of the year (Sunday as the first day of the week)", - "W": "week number of the year (Monday as the first day of the week)", - "c": "locale's appropriate date and time representation", - "x": "locale's appropriate date representation", - "X": "locale's appropriate time representation", - "%": "a literal '%' character", - } - - # Keep track of the positions in the string - i = 0 - n = len(format_string) - - # Iterate over each character in the format string - while i < n: - if format_string[i] == "%": - # Make sure there's at least one character following the '%' - if i + 1 < n: - code = format_string[i + 1] - # Check if the following character is a valid format code - if code not in valid_format_codes: - return False - i += 2 # Skip ahead past the format code - else: - # '%' is at the end of the string with no following format code - return False - else: - i += 1 # Continue to the next character - - return True diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index e3c23f0f9..d56c10b39 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -40,7 +40,6 @@ from torch import Tensor from torch.utils.data import DataLoader, Dataset - from safeds._typing import _BooleanCell from safeds.data.labeled.containers import TabularDataset from safeds.data.tabular.transformation import ( InvertibleTableTransformer, @@ -66,13 +65,11 @@ class Table: To create a `Table` call the constructor or use one of the following static methods: - | Method | Description | - | ---------------------------------------------------------------------------------- | -------------------------------------- | - | [from_csv_file][safeds.data.tabular.containers._table.Table.from_csv_file] | Create a table from a CSV file. | - | [from_json_file][safeds.data.tabular.containers._table.Table.from_json_file] | Create a table from a JSON file. | - | [from_parquet_file][safeds.data.tabular.containers._table.Table.from_parquet_file] | Create a table from a Parquet file. | - | [from_columns][safeds.data.tabular.containers._table.Table.from_columns] | Create a table from a list of columns. | - | [from_dict][safeds.data.tabular.containers._table.Table.from_dict] | Create a table from a dictionary. | + - [`from_csv_file`][safeds.data.tabular.containers._table.Table.from_csv_file]: Create a table from a CSV file. + - [`from_json_file`][safeds.data.tabular.containers._table.Table.from_json_file]: Create a table from a JSON file. + - [`from_parquet_file`][safeds.data.tabular.containers._table.Table.from_parquet_file]: Create a table from a Parquet file. + - [`from_columns`][safeds.data.tabular.containers._table.Table.from_columns]: Create a table from a list of columns. + - [`from_dict`][safeds.data.tabular.containers._table.Table.from_dict]: Create a table from a dictionary. Parameters ---------- @@ -142,22 +139,21 @@ def from_columns(columns: Column | list[Column]) -> Table: +-----+-----+ """ import polars as pl - from polars.exceptions import DuplicateError, ShapeError if isinstance(columns, Column): columns = [columns] + if len(columns) == 0: + return Table({}) - try: - return Table._from_polars_lazy_frame( - pl.LazyFrame([column._series for column in columns]), - ) - # polars already validates this, so we don't do it upfront (performance) - except DuplicateError: - _check_columns_dont_exist(Table({}), [column.name for column in columns]) - return Table({}) # pragma: no cover - except ShapeError: - _check_row_counts_are_equal(columns) - return Table({}) # pragma: no cover + _check_columns_dont_exist(Table({}), [column.name for column in columns]) + _check_row_counts_are_equal(columns) + + return Table._from_polars_lazy_frame( + pl.concat( + [column._lazy_frame for column in columns], + how="horizontal", + ), + ) @staticmethod def from_csv_file(path: str | Path, *, separator: str = ",") -> Table: @@ -198,8 +194,8 @@ def from_csv_file(path: str | Path, *, separator: str = ",") -> Table: Related ------- - - [from_json_file][safeds.data.tabular.containers._table.Table.from_json_file] - - [from_parquet_file][safeds.data.tabular.containers._table.Table.from_parquet_file] + - [`from_json_file`][safeds.data.tabular.containers._table.Table.from_json_file] + - [`from_parquet_file`][safeds.data.tabular.containers._table.Table.from_parquet_file] """ import polars as pl @@ -282,8 +278,8 @@ def from_json_file(path: str | Path) -> Table: Related ------- - - [from_csv_file][safeds.data.tabular.containers._table.Table.from_csv_file] - - [from_parquet_file][safeds.data.tabular.containers._table.Table.from_parquet_file] + - [`from_csv_file`][safeds.data.tabular.containers._table.Table.from_csv_file] + - [`from_parquet_file`][safeds.data.tabular.containers._table.Table.from_parquet_file] """ import polars as pl @@ -329,8 +325,8 @@ def from_parquet_file(path: str | Path) -> Table: Related ------- - - [from_csv_file][safeds.data.tabular.containers._table.Table.from_csv_file] - - [from_json_file][safeds.data.tabular.containers._table.Table.from_json_file] + - [`from_csv_file`][safeds.data.tabular.containers._table.Table.from_csv_file] + - [`from_json_file`][safeds.data.tabular.containers._table.Table.from_json_file] """ import polars as pl @@ -466,6 +462,8 @@ def schema(self) -> Schema: """ The schema of the table, which is a mapping from column names to their types. + **Note:** This operation must compute the schema of the table, which can be expensive. + Examples -------- >>> from safeds.data.tabular.containers import Table @@ -491,10 +489,7 @@ def add_columns( """ Add columns to the table and return the result as a new table. - **Notes:** - - - The original table is not modified. - - This operation must fully load the data into memory, which can be expensive. + **Note:** The original table is not modified. Parameters ---------- @@ -531,11 +526,11 @@ def add_columns( Related ------- - - [add_computed_column][safeds.data.tabular.containers._table.Table.add_computed_column]: + - [`add_computed_column`][safeds.data.tabular.containers._table.Table.add_computed_column]: Add a column with values computed from other columns. - - [add_index_column][safeds.data.tabular.containers._table.Table.add_index_column] + - [`add_index_column`][safeds.data.tabular.containers._table.Table.add_index_column] """ - from polars.exceptions import DuplicateError, ShapeError + import polars as pl if isinstance(columns, Table): return self.add_tables_as_columns(columns) @@ -545,17 +540,18 @@ def add_columns( if len(columns) == 0: return self - try: - return Table._from_polars_data_frame( - self._data_frame.hstack([column._series for column in columns]), - ) - # polars already validates this, so we don't do it upfront (performance) - except DuplicateError: - _check_columns_dont_exist(self, [column.name for column in columns]) - return Table({}) # pragma: no cover - except ShapeError: - _check_row_counts_are_equal([self, *columns]) - return Table({}) # pragma: no cover + _check_columns_dont_exist(self, [column.name for column in columns]) + _check_row_counts_are_equal([self, *columns], ignore_entries_without_rows=True) + + return Table._from_polars_lazy_frame( + pl.concat( + [ + self._lazy_frame, + *[column._lazy_frame for column in columns], + ], + how="horizontal", + ), + ) def add_computed_column( self, @@ -601,10 +597,10 @@ def add_computed_column( Related ------- - - [add_columns][safeds.data.tabular.containers._table.Table.add_columns]: + - [`add_columns`][safeds.data.tabular.containers._table.Table.add_columns]: Add column objects to the table. - - [add_index_column][safeds.data.tabular.containers._table.Table.add_index_column] - - [transform_columns][safeds.data.tabular.containers._table.Table.transform_columns]: + - [`add_index_column`][safeds.data.tabular.containers._table.Table.add_index_column] + - [`transform_columns`][safeds.data.tabular.containers._table.Table.transform_columns]: Transform existing columns with a custom function. """ _check_columns_dont_exist(self, name) @@ -672,9 +668,9 @@ def add_index_column(self, name: str, *, first_index: int = 0) -> Table: Related ------- - - [add_columns][safeds.data.tabular.containers._table.Table.add_columns]: + - [`add_columns`][safeds.data.tabular.containers._table.Table.add_columns]: Add column objects to the table. - - [add_computed_column][safeds.data.tabular.containers._table.Table.add_computed_column]: + - [`add_computed_column`][safeds.data.tabular.containers._table.Table.add_computed_column]: Add a column with values computed from other columns. """ _check_columns_dont_exist(self, name) @@ -723,9 +719,7 @@ def get_column(self, name: str) -> Column: +-----+ """ _check_columns_exist(self, name) - return Column._from_polars_series( - _safe_collect_lazy_frame(self._lazy_frame.select(name)).get_column(name), - ) + return Column._from_polars_lazy_frame(name, self._lazy_frame) def get_column_type(self, name: str) -> ColumnType: """ @@ -838,10 +832,10 @@ def remove_columns( Related ------- - - [select_columns][safeds.data.tabular.containers._table.Table.select_columns]: + - [`select_columns`][safeds.data.tabular.containers._table.Table.select_columns]: Keep only a subset of the columns. - - [remove_columns_with_missing_values][safeds.data.tabular.containers._table.Table.remove_columns_with_missing_values] - - [remove_non_numeric_columns][safeds.data.tabular.containers._table.Table.remove_non_numeric_columns] + - [`remove_columns_with_missing_values`][safeds.data.tabular.containers._table.Table.remove_columns_with_missing_values] + - [`remove_non_numeric_columns`][safeds.data.tabular.containers._table.Table.remove_non_numeric_columns] """ if isinstance(selector, str): selector = [selector] @@ -902,16 +896,16 @@ def remove_columns_with_missing_values( Related ------- - - [remove_rows_with_missing_values][safeds.data.tabular.containers._table.Table.remove_rows_with_missing_values] - - [SimpleImputer][safeds.data.tabular.transformation._simple_imputer.SimpleImputer]: + - [`remove_rows_with_missing_values`][safeds.data.tabular.containers._table.Table.remove_rows_with_missing_values] + - [`SimpleImputer`][safeds.data.tabular.transformation._simple_imputer.SimpleImputer]: Replace missing values with a constant value or a statistic of the column. - - [KNearestNeighborsImputer][safeds.data.tabular.transformation._k_nearest_neighbors_imputer.KNearestNeighborsImputer]: + - [`KNearestNeighborsImputer`][safeds.data.tabular.transformation._k_nearest_neighbors_imputer.KNearestNeighborsImputer]: Replace missing values with a value computed from the nearest neighbors. - - [select_columns][safeds.data.tabular.containers._table.Table.select_columns]: + - [`select_columns`][safeds.data.tabular.containers._table.Table.select_columns]: Keep only a subset of the columns. - - [remove_columns][safeds.data.tabular.containers._table.Table.remove_columns]: + - [`remove_columns`][safeds.data.tabular.containers._table.Table.remove_columns]: Remove columns from the table by name. - - [remove_non_numeric_columns][safeds.data.tabular.containers._table.Table.remove_non_numeric_columns] + - [`remove_non_numeric_columns`][safeds.data.tabular.containers._table.Table.remove_non_numeric_columns] """ import polars as pl @@ -962,11 +956,11 @@ def remove_non_numeric_columns(self) -> Table: Related ------- - - [select_columns][safeds.data.tabular.containers._table.Table.select_columns]: + - [`select_columns`][safeds.data.tabular.containers._table.Table.select_columns]: Keep only a subset of the columns. - - [remove_columns][safeds.data.tabular.containers._table.Table.remove_columns]: + - [`remove_columns`][safeds.data.tabular.containers._table.Table.remove_columns]: Remove columns from the table by name. - - [remove_columns_with_missing_values][safeds.data.tabular.containers._table.Table.remove_columns_with_missing_values] + - [`remove_columns_with_missing_values`][safeds.data.tabular.containers._table.Table.remove_columns_with_missing_values] """ import polars.selectors as cs @@ -1089,6 +1083,8 @@ def replace_column( | 9 | 12 | 6 | +-----+-----+-----+ """ + import polars.selectors as cs + if isinstance(new_columns, Column): new_columns = [new_columns] elif isinstance(new_columns, Table): @@ -1107,15 +1103,14 @@ def replace_column( self._lazy_frame.with_columns(new_column._series.alias(old_name)).rename({old_name: new_column.name}), ) - import polars as pl - - index = self.column_names.index(old_name) + column_names = self.column_names + index = column_names.index(old_name) return Table._from_polars_lazy_frame( self._lazy_frame.select( - *[pl.col(name) for name in self.column_names[:index]], + cs.by_name(column_names[:index]), *[column._series for column in new_columns], - *[pl.col(name) for name in self.column_names[index + 1 :]], + cs.by_name(column_names[index + 1 :]), ), ) @@ -1160,10 +1155,10 @@ def select_columns( Related ------- - - [remove_columns][safeds.data.tabular.containers._table.Table.remove_columns]: + - [`remove_columns`][safeds.data.tabular.containers._table.Table.remove_columns]: Remove columns from the table by name. - - [remove_columns_with_missing_values][safeds.data.tabular.containers._table.Table.remove_columns_with_missing_values] - - [remove_non_numeric_columns][safeds.data.tabular.containers._table.Table.remove_non_numeric_columns] + - [`remove_columns_with_missing_values`][safeds.data.tabular.containers._table.Table.remove_columns_with_missing_values] + - [`remove_non_numeric_columns`][safeds.data.tabular.containers._table.Table.remove_non_numeric_columns] """ _check_columns_exist(self, selector) @@ -1238,9 +1233,9 @@ def transform_columns( Related ------- - - [add_computed_column][safeds.data.tabular.containers._table.Table.add_computed_column]: + - [`add_computed_column`][safeds.data.tabular.containers._table.Table.add_computed_column]: Add a new column that is computed from other columns. - - [transform_table][safeds.data.tabular.containers._table.Table.transform_table]: + - [`transform_table`][safeds.data.tabular.containers._table.Table.transform_table]: Transform the entire table with a fitted transformer. """ import polars as pl @@ -1280,7 +1275,7 @@ def transform_columns( @overload def count_rows_if( self, - predicate: Callable[[Row], _BooleanCell], + predicate: Callable[[Row], Cell[bool | None]], *, ignore_unknown: Literal[True] = ..., ) -> int: ... @@ -1288,14 +1283,14 @@ def count_rows_if( @overload def count_rows_if( self, - predicate: Callable[[Row], _BooleanCell], + predicate: Callable[[Row], Cell[bool | None]], *, ignore_unknown: bool, ) -> int | None: ... def count_rows_if( self, - predicate: Callable[[Row], _BooleanCell], + predicate: Callable[[Row], Cell[bool | None]], *, ignore_unknown: bool = True, ) -> int | None: @@ -1346,7 +1341,7 @@ def count_rows_if( def filter_rows( self, - predicate: Callable[[Row], _BooleanCell], + predicate: Callable[[Row], Cell[bool | None]], ) -> Table: """ Keep only rows that satisfy a condition and return the result as a new table. @@ -1378,11 +1373,11 @@ def filter_rows( Related ------- - - [filter_rows_by_column][safeds.data.tabular.containers._table.Table.filter_rows_by_column]: + - [`filter_rows_by_column`][safeds.data.tabular.containers._table.Table.filter_rows_by_column]: Keep only rows that satisfy a condition on a specific column. - - [remove_duplicate_rows][safeds.data.tabular.containers._table.Table.remove_duplicate_rows] - - [remove_rows_with_missing_values][safeds.data.tabular.containers._table.Table.remove_rows_with_missing_values] - - [remove_rows_with_outliers][safeds.data.tabular.containers._table.Table.remove_rows_with_outliers] + - [`remove_duplicate_rows`][safeds.data.tabular.containers._table.Table.remove_duplicate_rows] + - [`remove_rows_with_missing_values`][safeds.data.tabular.containers._table.Table.remove_rows_with_missing_values] + - [`remove_rows_with_outliers`][safeds.data.tabular.containers._table.Table.remove_rows_with_outliers] """ mask = predicate(_LazyVectorizedRow(self)) @@ -1393,7 +1388,7 @@ def filter_rows( def filter_rows_by_column( self, name: str, - predicate: Callable[[Cell], _BooleanCell], + predicate: Callable[[Cell], Cell[bool | None]], ) -> Table: """ Keep only rows that satisfy a condition on a specific column and return the result as a new table. @@ -1432,11 +1427,11 @@ def filter_rows_by_column( Related ------- - - [filter_rows][safeds.data.tabular.containers._table.Table.filter_rows]: + - [`filter_rows`][safeds.data.tabular.containers._table.Table.filter_rows]: Keep only rows that satisfy a condition. - - [remove_duplicate_rows][safeds.data.tabular.containers._table.Table.remove_duplicate_rows] - - [remove_rows_with_missing_values][safeds.data.tabular.containers._table.Table.remove_rows_with_missing_values] - - [remove_rows_with_outliers][safeds.data.tabular.containers._table.Table.remove_rows_with_outliers] + - [`remove_duplicate_rows`][safeds.data.tabular.containers._table.Table.remove_duplicate_rows] + - [`remove_rows_with_missing_values`][safeds.data.tabular.containers._table.Table.remove_rows_with_missing_values] + - [`remove_rows_with_outliers`][safeds.data.tabular.containers._table.Table.remove_rows_with_outliers] """ _check_columns_exist(self, name) @@ -1475,12 +1470,12 @@ def remove_duplicate_rows(self) -> Table: Related ------- - - [filter_rows][safeds.data.tabular.containers._table.Table.filter_rows]: + - [`filter_rows`][safeds.data.tabular.containers._table.Table.filter_rows]: Keep only rows that satisfy a condition. - - [filter_rows_by_column][safeds.data.tabular.containers._table.Table.filter_rows_by_column]: + - [`filter_rows_by_column`][safeds.data.tabular.containers._table.Table.filter_rows_by_column]: Keep only rows that satisfy a condition on a specific column. - - [remove_rows_with_missing_values][safeds.data.tabular.containers._table.Table.remove_rows_with_missing_values] - - [remove_rows_with_outliers][safeds.data.tabular.containers._table.Table.remove_rows_with_outliers] + - [`remove_rows_with_missing_values`][safeds.data.tabular.containers._table.Table.remove_rows_with_missing_values] + - [`remove_rows_with_outliers`][safeds.data.tabular.containers._table.Table.remove_rows_with_outliers] """ return Table._from_polars_lazy_frame( self._lazy_frame.unique(maintain_order=True), @@ -1488,7 +1483,7 @@ def remove_duplicate_rows(self) -> Table: def remove_rows( self, - predicate: Callable[[Row], _BooleanCell], + predicate: Callable[[Row], Cell[bool | None]], ) -> Table: """ Remove rows that satisfy a condition and return the result as a new table. @@ -1521,15 +1516,15 @@ def remove_rows( Related ------- - - [filter_rows][safeds.data.tabular.containers._table.Table.filter_rows]: + - [`filter_rows`][safeds.data.tabular.containers._table.Table.filter_rows]: Keep only rows that satisfy a condition. - - [filter_rows_by_column][safeds.data.tabular.containers._table.Table.filter_rows_by_column]: + - [`filter_rows_by_column`][safeds.data.tabular.containers._table.Table.filter_rows_by_column]: Keep only rows that satisfy a condition on a specific column. - - [remove_rows_by_column][safeds.data.tabular.containers._table.Table.filter_rows_by_column]: + - [`remove_rows_by_column`][safeds.data.tabular.containers._table.Table.filter_rows_by_column]: Remove rows that satisfy a condition on a specific column. - - [remove_duplicate_rows][safeds.data.tabular.containers._table.Table.remove_duplicate_rows] - - [remove_rows_with_missing_values][safeds.data.tabular.containers._table.Table.remove_rows_with_missing_values] - - [remove_rows_with_outliers][safeds.data.tabular.containers._table.Table.remove_rows_with_outliers] + - [`remove_duplicate_rows`][safeds.data.tabular.containers._table.Table.remove_duplicate_rows] + - [`remove_rows_with_missing_values`][safeds.data.tabular.containers._table.Table.remove_rows_with_missing_values] + - [`remove_rows_with_outliers`][safeds.data.tabular.containers._table.Table.remove_rows_with_outliers] """ mask = predicate(_LazyVectorizedRow(self)) @@ -1540,7 +1535,7 @@ def remove_rows( def remove_rows_by_column( self, name: str, - predicate: Callable[[Cell], _BooleanCell], + predicate: Callable[[Cell], Cell[bool | None]], ) -> Table: """ Remove rows that satisfy a condition on a specific column and return the result as a new table. @@ -1580,15 +1575,15 @@ def remove_rows_by_column( Related ------- - - [filter_rows][safeds.data.tabular.containers._table.Table.filter_rows]: + - [`filter_rows`][safeds.data.tabular.containers._table.Table.filter_rows]: Keep only rows that satisfy a condition. - - [filter_rows_by_column][safeds.data.tabular.containers._table.Table.filter_rows_by_column]: + - [`filter_rows_by_column`][safeds.data.tabular.containers._table.Table.filter_rows_by_column]: Keep only rows that satisfy a condition on a specific column. - - [remove_rows][safeds.data.tabular.containers._table.Table.remove_rows]: + - [`remove_rows`][safeds.data.tabular.containers._table.Table.remove_rows]: Remove rows that satisfy a condition. - - [remove_duplicate_rows][safeds.data.tabular.containers._table.Table.remove_duplicate_rows] - - [remove_rows_with_missing_values][safeds.data.tabular.containers._table.Table.remove_rows_with_missing_values] - - [remove_rows_with_outliers][safeds.data.tabular.containers._table.Table.remove_rows_with_outliers] + - [`remove_duplicate_rows`][safeds.data.tabular.containers._table.Table.remove_duplicate_rows] + - [`remove_rows_with_missing_values`][safeds.data.tabular.containers._table.Table.remove_rows_with_missing_values] + - [`remove_rows_with_outliers`][safeds.data.tabular.containers._table.Table.remove_rows_with_outliers] """ _check_columns_exist(self, name) @@ -1649,17 +1644,17 @@ def remove_rows_with_missing_values( Related ------- - - [remove_columns_with_missing_values][safeds.data.tabular.containers._table.Table.remove_columns_with_missing_values] - - [SimpleImputer][safeds.data.tabular.transformation._simple_imputer.SimpleImputer]: + - [`remove_columns_with_missing_values`][safeds.data.tabular.containers._table.Table.remove_columns_with_missing_values] + - [`SimpleImputer`][safeds.data.tabular.transformation._simple_imputer.SimpleImputer]: Replace missing values with a constant value or a statistic of the column. - - [KNearestNeighborsImputer][safeds.data.tabular.transformation._k_nearest_neighbors_imputer.KNearestNeighborsImputer]: + - [`KNearestNeighborsImputer`][safeds.data.tabular.transformation._k_nearest_neighbors_imputer.KNearestNeighborsImputer]: Replace missing values with a value computed from the nearest neighbors. - - [filter_rows][safeds.data.tabular.containers._table.Table.filter_rows]: + - [`filter_rows`][safeds.data.tabular.containers._table.Table.filter_rows]: Keep only rows that satisfy a condition. - - [filter_rows_by_column][safeds.data.tabular.containers._table.Table.filter_rows_by_column]: + - [`filter_rows_by_column`][safeds.data.tabular.containers._table.Table.filter_rows_by_column]: Keep only rows that satisfy a condition on a specific column. - - [remove_duplicate_rows][safeds.data.tabular.containers._table.Table.remove_duplicate_rows] - - [remove_rows_with_outliers][safeds.data.tabular.containers._table.Table.remove_rows_with_outliers] + - [`remove_duplicate_rows`][safeds.data.tabular.containers._table.Table.remove_duplicate_rows] + - [`remove_rows_with_outliers`][safeds.data.tabular.containers._table.Table.remove_rows_with_outliers] """ if isinstance(selector, list) and not selector: # polars panics in this case @@ -1734,12 +1729,12 @@ def remove_rows_with_outliers( Related ------- - - [filter_rows][safeds.data.tabular.containers._table.Table.filter_rows]: + - [`filter_rows`][safeds.data.tabular.containers._table.Table.filter_rows]: Keep only rows that satisfy a condition. - - [filter_rows_by_column][safeds.data.tabular.containers._table.Table.filter_rows_by_column]: + - [`filter_rows_by_column`][safeds.data.tabular.containers._table.Table.filter_rows_by_column]: Keep only rows that satisfy a condition on a specific column. - - [remove_duplicate_rows][safeds.data.tabular.containers._table.Table.remove_duplicate_rows] - - [remove_rows_with_missing_values][safeds.data.tabular.containers._table.Table.remove_rows_with_missing_values] + - [`remove_duplicate_rows`][safeds.data.tabular.containers._table.Table.remove_duplicate_rows] + - [`remove_rows_with_missing_values`][safeds.data.tabular.containers._table.Table.remove_rows_with_missing_values] """ _check_bounds( "z_score_threshold", @@ -1908,7 +1903,7 @@ def sort_rows( Related ------- - - [sort_rows_by_column][safeds.data.tabular.containers._table.Table.sort_rows_by_column]: + - [`sort_rows_by_column`][safeds.data.tabular.containers._table.Table.sort_rows_by_column]: Sort the rows by a specific column. """ key = key_selector(_LazyVectorizedRow(self)) @@ -1966,7 +1961,7 @@ def sort_rows_by_column( Related ------- - - [sort_rows][safeds.data.tabular.containers._table.Table.sort_rows]: + - [`sort_rows`][safeds.data.tabular.containers._table.Table.sort_rows]: Sort the rows by a value computed from an entire row. """ _check_columns_exist(self, name) @@ -2104,7 +2099,7 @@ def add_tables_as_columns(self, others: Table | list[Table]) -> Table: Related ------- - - [add_tables_as_rows][safeds.data.tabular.containers._table.Table.add_tables_as_rows] + - [`add_tables_as_rows`][safeds.data.tabular.containers._table.Table.add_tables_as_rows] """ import polars as pl @@ -2161,7 +2156,7 @@ def add_tables_as_rows(self, others: Table | list[Table]) -> Table: Related ------- - - [add_tables_as_columns][safeds.data.tabular.containers._table.Table.add_tables_as_columns] + - [`add_tables_as_columns`][safeds.data.tabular.containers._table.Table.add_tables_as_columns] """ import polars as pl @@ -2210,7 +2205,7 @@ def inverse_transform_table(self, fitted_transformer: InvertibleTableTransformer >>> from safeds.data.tabular.containers import Table >>> from safeds.data.tabular.transformation import RangeScaler >>> table = Table({"a": [1, 2, 3]}) - >>> transformer, transformed_table = RangeScaler(min_=0, max_=1).fit_and_transform(table) + >>> transformer, transformed_table = RangeScaler(min=0, max=1).fit_and_transform(table) >>> transformed_table.inverse_transform_table(transformer) +---------+ | a | @@ -2224,7 +2219,7 @@ def inverse_transform_table(self, fitted_transformer: InvertibleTableTransformer Related ------- - - [transform_table][safeds.data.tabular.containers._table.Table.transform_table]: + - [`transform_table`][safeds.data.tabular.containers._table.Table.transform_table]: Transform the table with a fitted transformer. """ return fitted_transformer.inverse_transform(self) @@ -2406,7 +2401,7 @@ def transform_table(self, fitted_transformer: TableTransformer) -> Table: >>> from safeds.data.tabular.containers import Table >>> from safeds.data.tabular.transformation import RangeScaler >>> table = Table({"a": [1, 2, 3]}) - >>> transformer = RangeScaler(min_=0, max_=1).fit(table) + >>> transformer = RangeScaler(min=0, max=1).fit(table) >>> table.transform_table(transformer) +---------+ | a | @@ -2420,9 +2415,9 @@ def transform_table(self, fitted_transformer: TableTransformer) -> Table: Related ------- - - [inverse_transform_table][safeds.data.tabular.containers._table.Table.inverse_transform_table]: + - [`inverse_transform_table`][safeds.data.tabular.containers._table.Table.inverse_transform_table]: Inverse-transform the table with a fitted, invertible transformer. - - [transform_columns][safeds.data.tabular.containers._table.Table.transform_columns]: + - [`transform_columns`][safeds.data.tabular.containers._table.Table.transform_columns]: Transform columns with a custom function. """ return fitted_transformer.transform(self) @@ -2551,7 +2546,7 @@ def to_columns(self) -> list[Column]: >>> table = Table({"a": [1, 2, 3], "b": [4, 5, 6]}) >>> columns = table.to_columns() """ - return [Column._from_polars_series(column) for column in self._data_frame.get_columns()] + return [Column._from_polars_lazy_frame(name, self._lazy_frame) for name in self.column_names] def to_csv_file(self, path: str | Path) -> None: """ @@ -2578,8 +2573,8 @@ def to_csv_file(self, path: str | Path) -> None: Related ------- - - [to_json_file][safeds.data.tabular.containers._table.Table.to_json_file] - - [to_parquet_file][safeds.data.tabular.containers._table.Table.to_parquet_file] + - [`to_json_file`][safeds.data.tabular.containers._table.Table.to_json_file] + - [`to_parquet_file`][safeds.data.tabular.containers._table.Table.to_parquet_file] """ path = _normalize_and_check_file_path(path, ".csv", [".csv"]) path.parent.mkdir(parents=True, exist_ok=True) @@ -2590,9 +2585,11 @@ def to_dict(self) -> dict[str, list[Any]]: """ Return a dictionary that maps column names to column values. + **Note:** This operation must fully load the data into memory, which can be expensive. + Returns ------- - dict_: + dict: The dictionary representation of the table. Examples @@ -2634,8 +2631,8 @@ def to_json_file( Related ------- - - [to_csv_file][safeds.data.tabular.containers._table.Table.to_csv_file] - - [to_parquet_file][safeds.data.tabular.containers._table.Table.to_parquet_file] + - [`to_csv_file`][safeds.data.tabular.containers._table.Table.to_csv_file] + - [`to_parquet_file`][safeds.data.tabular.containers._table.Table.to_parquet_file] """ path = _normalize_and_check_file_path(path, ".json", [".json"]) path.parent.mkdir(parents=True, exist_ok=True) @@ -2668,8 +2665,8 @@ def to_parquet_file(self, path: str | Path) -> None: Related ------- - - [to_csv_file][safeds.data.tabular.containers._table.Table.to_csv_file] - - [to_json_file][safeds.data.tabular.containers._table.Table.to_json_file] + - [`to_csv_file`][safeds.data.tabular.containers._table.Table.to_csv_file] + - [`to_json_file`][safeds.data.tabular.containers._table.Table.to_json_file] """ path = _normalize_and_check_file_path(path, ".parquet", [".parquet"]) path.parent.mkdir(parents=True, exist_ok=True) diff --git a/src/safeds/data/tabular/containers/_temporal_cell.py b/src/safeds/data/tabular/containers/_temporal_cell.py deleted file mode 100644 index e4a3dca59..000000000 --- a/src/safeds/data/tabular/containers/_temporal_cell.py +++ /dev/null @@ -1,254 +0,0 @@ -from __future__ import annotations - -from abc import ABC, abstractmethod -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from safeds.data.tabular.containers import Cell - - -class TemporalCell(ABC): - """ - Namespace for operations on temporal data. - - This class cannot be instantiated directly. It can only be accessed using the `dt` attribute of a cell. - - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> import datetime - >>> column = Column("example", [datetime.date(2022, 1, 9)]) - >>> column.transform(lambda cell: cell.dt.date_to_string("%Y/%m/%d")) - +------------+ - | example | - | --- | - | str | - +============+ - | 2022/01/09 | - +------------+ - """ - - @abstractmethod - def century(self) -> Cell[int]: - """ - Get the century of the underlying date(time) data. - - Returns - ------- - A cell containing the century as integer. - - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> import datetime - >>> column = Column("example", [datetime.date(2022, 1, 1)]) - >>> column.transform(lambda cell: cell.dt.century()) - +---------+ - | example | - | --- | - | i32 | - +=========+ - | 21 | - +---------+ - """ - - @abstractmethod - def weekday(self) -> Cell[int]: - """ - Get the weekday of the underlying date(time) data. - - Returns - ------- - A cell containing the weekday as integer. - - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> import datetime - >>> column = Column("example", [datetime.date(2022, 1, 1)]) - >>> column.transform(lambda cell: cell.dt.weekday()) - +---------+ - | example | - | --- | - | i8 | - +=========+ - | 6 | - +---------+ - """ - - @abstractmethod - def week(self) -> Cell[int]: - """ - Get the week of the underlying date(time) data. - - Returns - ------- - A cell containing the week as integer. - - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> import datetime - >>> column = Column("example", [datetime.date(2022, 1, 1)]) - >>> column.transform(lambda cell: cell.dt.week()) - +---------+ - | example | - | --- | - | i8 | - +=========+ - | 52 | - +---------+ - """ - - @abstractmethod - def year(self) -> Cell[int]: - """ - Get the year of the underlying date(time) data. - - Returns - ------- - A cell containing the year as integer. - - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> import datetime - >>> column = Column("example", [datetime.date(2022, 1, 9)]) - >>> column.transform(lambda cell: cell.dt.year()) - +---------+ - | example | - | --- | - | i32 | - +=========+ - | 2022 | - +---------+ - """ - - @abstractmethod - def month(self) -> Cell[int]: - """ - Get the month of the underlying date(time) data. - - Returns - ------- - A cell containing the month as integer. - - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> import datetime - >>> column = Column("example", [datetime.date(2022, 1, 9)]) - >>> column.transform(lambda cell: cell.dt.month()) - +---------+ - | example | - | --- | - | i8 | - +=========+ - | 1 | - +---------+ - """ - - @abstractmethod - def day(self) -> Cell[int]: - """ - Get the day of the underlying date(time) data. - - Returns - ------- - A cell containing the day as integer. - - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> import datetime - >>> column = Column("example", [datetime.date(2022, 1, 9)]) - >>> column.transform(lambda cell: cell.dt.day()) - +---------+ - | example | - | --- | - | i8 | - +=========+ - | 9 | - +---------+ - """ - - @abstractmethod - def datetime_to_string(self, format_string: str = "%Y/%m/%d %H:%M:%S") -> Cell[str]: - """ - Convert the date value in the cell to a string. - - Parameters - ---------- - format_string: - The format string it will be used to convert the data into the string. - - Returns - ------- - date: - The string value. - - Raises - ------ - ValueError - If the formatstring is invalid. - - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> import datetime - >>> column = Column("example", [ datetime.datetime(2022, 1, 9, 23, 29, 1, tzinfo=datetime.UTC)]) - >>> column.transform(lambda cell: cell.dt.datetime_to_string()) - +---------------------+ - | example | - | --- | - | str | - +=====================+ - | 2022/01/09 23:29:01 | - +---------------------+ - """ - - @abstractmethod - def date_to_string(self, format_string: str = "%F") -> Cell[str]: - """ - Convert the date value in the cell to a string. - - Parameters - ---------- - format_string: - The format string it will be used to convert the data into the string. - - Returns - ------- - date: - The string value. - - - ValueError - If the formatstring is invalid. - - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> import datetime - >>> column = Column("example", [datetime.date(2022, 1, 9)]) - >>> column.transform(lambda cell: cell.dt.date_to_string()) - +------------+ - | example | - | --- | - | str | - +============+ - | 2022-01-09 | - +------------+ - """ - - # ------------------------------------------------------------------------------------------------------------------ - # Internal - # ------------------------------------------------------------------------------------------------------------------ - - @abstractmethod - def _equals(self, other: object) -> bool: - """ - Check if this cell is equal to another object. - - This method is needed because the `__eq__` method is used for element-wise comparisons. - """ diff --git a/src/safeds/data/tabular/query/__init__.py b/src/safeds/data/tabular/query/__init__.py new file mode 100644 index 000000000..d3671f51b --- /dev/null +++ b/src/safeds/data/tabular/query/__init__.py @@ -0,0 +1,28 @@ +"""Classes that represent queries on the data.""" + +from typing import TYPE_CHECKING + +import apipkg + +if TYPE_CHECKING: + from ._datetime_operations import DatetimeOperations + from ._duration_operations import DurationOperations + from ._math_operations import MathOperations + from ._string_operations import StringOperations + +apipkg.initpkg( + __name__, + { + "DatetimeOperations": "._datetime_operations:DatetimeOperations", + "DurationOperations": "._duration_operations:DurationOperations", + "MathOperations": "._math_operations:MathOperations", + "StringOperations": "._string_operations:StringOperations", + }, +) + +__all__ = [ + "DatetimeOperations", + "DurationOperations", + "MathOperations", + "StringOperations", +] diff --git a/src/safeds/data/tabular/query/_datetime_operations.py b/src/safeds/data/tabular/query/_datetime_operations.py new file mode 100644 index 000000000..7e9546b8f --- /dev/null +++ b/src/safeds/data/tabular/query/_datetime_operations.py @@ -0,0 +1,970 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING, Literal + +if TYPE_CHECKING: + from datetime import date as python_date + from datetime import time as python_time + + from safeds._typing import _ConvertibleToIntCell + from safeds.data.tabular.containers import Cell + + +class DatetimeOperations(ABC): + """ + Namespace for operations on datetimes, dates, and times. + + This class cannot be instantiated directly. It can only be accessed using the `dt` attribute of a cell. + + Examples + -------- + >>> from datetime import date + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [date(2022, 1, 9), date(2024, 6, 12)]) + >>> column.transform(lambda cell: cell.dt.year()) + +------+ + | a | + | --- | + | i32 | + +======+ + | 2022 | + | 2024 | + +------+ + """ + + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + + @abstractmethod + def __eq__(self, other: object) -> bool: ... + + @abstractmethod + def __hash__(self) -> int: ... + + @abstractmethod + def __repr__(self) -> str: ... + + @abstractmethod + def __sizeof__(self) -> int: ... + + @abstractmethod + def __str__(self) -> str: ... + + # ------------------------------------------------------------------------------------------------------------------ + # Extract components + # ------------------------------------------------------------------------------------------------------------------ + + @abstractmethod + def century(self) -> Cell[int | None]: + """ + Extract the century from a datetime or date. + + Note that since our calendar begins with year 1 the first century lasts from year 1 to year 100. Subsequent + centuries begin with years ending in "01" and end with years ending in "00". + + Returns + ------- + cell: + The century. + + Examples + -------- + >>> from datetime import date, datetime + >>> from safeds.data.tabular.containers import Column + >>> column1 = Column("a", [datetime(1999, 12, 31), datetime(2000, 1, 1), datetime(2001, 1, 1), None]) + >>> column1.transform(lambda cell: cell.dt.century()) + +------+ + | a | + | --- | + | i32 | + +======+ + | 20 | + | 20 | + | 21 | + | null | + +------+ + + >>> column2 = Column("a", [date(1999, 12, 31), date(2000, 1, 1), date(2001, 1, 1), None]) + >>> column2.transform(lambda cell: cell.dt.century()) + +------+ + | a | + | --- | + | i32 | + +======+ + | 20 | + | 20 | + | 21 | + | null | + +------+ + """ + + @abstractmethod + def date(self) -> Cell[python_date | None]: + """ + Extract the date from a datetime. + + Returns + ------- + cell: + The date. + + Examples + -------- + >>> from datetime import datetime + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [datetime(1999, 12, 31), datetime(2000, 1, 1, 12, 30, 0), None]) + >>> column.transform(lambda cell: cell.dt.date()) + +------------+ + | a | + | --- | + | date | + +============+ + | 1999-12-31 | + | 2000-01-01 | + | null | + +------------+ + """ + + @abstractmethod + def day(self) -> Cell[int | None]: + """ + Extract the day from a datetime or date. + + Returns + ------- + cell: + The day. + + Examples + -------- + >>> from datetime import date, datetime + >>> from safeds.data.tabular.containers import Column + >>> column1 = Column("a", [datetime(1999, 12, 31), datetime(2000, 1, 1), None]) + >>> column1.transform(lambda cell: cell.dt.day()) + +------+ + | a | + | --- | + | i8 | + +======+ + | 31 | + | 1 | + | null | + +------+ + + >>> column2 = Column("a", [date(1999, 12, 31), date(2000, 1, 1), None]) + >>> column2.transform(lambda cell: cell.dt.day()) + +------+ + | a | + | --- | + | i8 | + +======+ + | 31 | + | 1 | + | null | + +------+ + """ + + @abstractmethod + def day_of_week(self) -> Cell[int | None]: + """ + Extract the day of the week from a datetime or date as defined by ISO 8601. + + The day of the week is a number between 1 (Monday) and 7 (Sunday). + + Returns + ------- + cell: + The day of the week. + + Examples + -------- + >>> from datetime import date, datetime + >>> from safeds.data.tabular.containers import Column + >>> column1 = Column("a", [datetime(2000, 1, 1), datetime(2000, 1, 2), None]) + >>> column1.transform(lambda cell: cell.dt.day_of_week()) + +------+ + | a | + | --- | + | i8 | + +======+ + | 6 | + | 7 | + | null | + +------+ + + >>> column2 = Column("a", [date(2000, 1, 1), date(2000, 1, 2), None]) + >>> column2.transform(lambda cell: cell.dt.day_of_week()) + +------+ + | a | + | --- | + | i8 | + +======+ + | 6 | + | 7 | + | null | + +------+ + """ + + @abstractmethod + def day_of_year(self) -> Cell[int | None]: + """ + Extract the day of the year from a datetime or date. + + The day of the year is a number between 1 and 366. A 366th day only occurs in leap years. + + Returns + ------- + cell: + The day of the year. + + Examples + -------- + >>> from datetime import date, datetime + >>> from safeds.data.tabular.containers import Column + >>> column1 = Column("a", [datetime(1999, 12, 31), datetime(2000, 1, 1), datetime(2000, 12, 31), None]) + >>> column1.transform(lambda cell: cell.dt.day_of_year()) + +------+ + | a | + | --- | + | i16 | + +======+ + | 365 | + | 1 | + | 366 | + | null | + +------+ + + >>> column2 = Column("a", [date(1999, 12, 31), date(2000, 1, 1), date(2000, 12, 31), None]) + >>> column2.transform(lambda cell: cell.dt.day_of_year()) + +------+ + | a | + | --- | + | i16 | + +======+ + | 365 | + | 1 | + | 366 | + | null | + +------+ + """ + + @abstractmethod + def hour(self) -> Cell[int | None]: + """ + Extract the hour from a datetime or time. + + Returns + ------- + cell: + The hour. + + Examples + -------- + >>> from datetime import datetime, time + >>> from safeds.data.tabular.containers import Column + >>> column1 = Column("a", [datetime(2000, 1, 1, hour=0), datetime(2000, 1, 1, hour=12), None]) + >>> column1.transform(lambda cell: cell.dt.hour()) + +------+ + | a | + | --- | + | i8 | + +======+ + | 0 | + | 12 | + | null | + +------+ + + >>> column2 = Column("a", [time(hour=0), time(hour=12), None]) + >>> column2.transform(lambda cell: cell.dt.hour()) + +------+ + | a | + | --- | + | i8 | + +======+ + | 0 | + | 12 | + | null | + +------+ + """ + + @abstractmethod + def microsecond(self) -> Cell[int | None]: + """ + Extract the microsecond from a datetime or time. + + Returns + ------- + cell: + The microsecond. + + Examples + -------- + >>> from datetime import datetime, time + >>> from safeds.data.tabular.containers import Column + >>> column1 = Column("a", [datetime(2000, 1, 1, microsecond=0), datetime(2000, 1, 1, microsecond=500), None]) + >>> column1.transform(lambda cell: cell.dt.microsecond()) + +------+ + | a | + | --- | + | i32 | + +======+ + | 0 | + | 500 | + | null | + +------+ + + >>> column2 = Column("a", [time(microsecond=0), time(microsecond=500), None]) + >>> column2.transform(lambda cell: cell.dt.microsecond()) + +------+ + | a | + | --- | + | i32 | + +======+ + | 0 | + | 500 | + | null | + +------+ + """ + + @abstractmethod + def millennium(self) -> Cell[int | None]: + """ + Extract the millennium from a datetime or date. + + Note that since our calendar begins with year 1 the first millennium lasts from year 1 to year 1000. Subsequent + centuries begin with years ending in "001" and end with years ending in "000". + + Returns + ------- + cell: + The millennium. + + Examples + -------- + >>> from datetime import date, datetime + >>> from safeds.data.tabular.containers import Column + >>> column1 = Column("a", [datetime(1999, 12, 31), datetime(2000, 1, 1), datetime(2001, 1, 1), None]) + >>> column1.transform(lambda cell: cell.dt.millennium()) + +------+ + | a | + | --- | + | i32 | + +======+ + | 2 | + | 2 | + | 3 | + | null | + +------+ + + >>> column2 = Column("a", [date(1999, 12, 31), date(2000, 1, 1), date(2001, 1, 1), None]) + >>> column2.transform(lambda cell: cell.dt.millennium()) + +------+ + | a | + | --- | + | i32 | + +======+ + | 2 | + | 2 | + | 3 | + | null | + +------+ + """ + + @abstractmethod + def millisecond(self) -> Cell[int | None]: + """ + Extract the millisecond from a datetime or time. + + Returns + ------- + cell: + The millisecond. + + Examples + -------- + >>> from datetime import datetime, time + >>> from safeds.data.tabular.containers import Column + >>> column1 = Column("a", [datetime(2000, 1, 1, microsecond=0), datetime(2000, 1, 1, microsecond=500000), None]) + >>> column1.transform(lambda cell: cell.dt.millisecond()) + +------+ + | a | + | --- | + | i32 | + +======+ + | 0 | + | 500 | + | null | + +------+ + + >>> column2 = Column("a", [time(microsecond=0), time(microsecond=500000), None]) + >>> column2.transform(lambda cell: cell.dt.millisecond()) + +------+ + | a | + | --- | + | i32 | + +======+ + | 0 | + | 500 | + | null | + +------+ + """ + + @abstractmethod + def minute(self) -> Cell[int | None]: + """ + Extract the minute from a datetime or time. + + Returns + ------- + cell: + The minute. + + Examples + -------- + >>> from datetime import datetime, time + >>> from safeds.data.tabular.containers import Column + >>> column1 = Column("a", [datetime(2000, 1, 1, minute=0), datetime(2000, 1, 1, minute=30), None]) + >>> column1.transform(lambda cell: cell.dt.minute()) + +------+ + | a | + | --- | + | i8 | + +======+ + | 0 | + | 30 | + | null | + +------+ + + >>> column2 = Column("a", [time(minute=0), time(minute=30), None]) + >>> column2.transform(lambda cell: cell.dt.minute()) + +------+ + | a | + | --- | + | i8 | + +======+ + | 0 | + | 30 | + | null | + +------+ + """ + + @abstractmethod + def month(self) -> Cell[int | None]: + """ + Extract the month from a datetime or date. + + Returns + ------- + cell: + The month. + + Examples + -------- + >>> from datetime import date, datetime + >>> from safeds.data.tabular.containers import Column + >>> column1 = Column("a", [datetime(1999, 12, 31), datetime(2000, 1, 1), None]) + >>> column1.transform(lambda cell: cell.dt.month()) + +------+ + | a | + | --- | + | i8 | + +======+ + | 12 | + | 1 | + | null | + +------+ + + >>> column2 = Column("a", [date(1999, 12, 31), date(2000, 1, 1), None]) + >>> column2.transform(lambda cell: cell.dt.month()) + +------+ + | a | + | --- | + | i8 | + +======+ + | 12 | + | 1 | + | null | + +------+ + """ + + @abstractmethod + def quarter(self) -> Cell[int | None]: + """ + Extract the quarter from a datetime or date. + + The quarter is a number between 1 and 4: + + - 1: January to March + - 2: April to June + - 3: July to September + - 4: October to December + + Returns + ------- + cell: + The quarter. + + Examples + -------- + >>> from datetime import date, datetime + >>> from safeds.data.tabular.containers import Column + >>> column1 = Column("a", [datetime(1999, 12, 31), datetime(2000, 1, 1), datetime(2000, 4, 1), None]) + >>> column1.transform(lambda cell: cell.dt.quarter()) + +------+ + | a | + | --- | + | i8 | + +======+ + | 4 | + | 1 | + | 2 | + | null | + +------+ + + >>> column2 = Column("a", [date(1999, 12, 31), date(2000, 1, 1), date(2000, 4, 1), None]) + >>> column2.transform(lambda cell: cell.dt.quarter()) + +------+ + | a | + | --- | + | i8 | + +======+ + | 4 | + | 1 | + | 2 | + | null | + +------+ + """ + + @abstractmethod + def second(self) -> Cell[int | None]: + """ + Extract the second from a datetime or time. + + Returns + ------- + cell: + The second. + + Examples + -------- + >>> from datetime import datetime, time + >>> from safeds.data.tabular.containers import Column + >>> column1 = Column("a", [datetime(2000, 1, 1, second=0), datetime(2000, 1, 1, second=30), None]) + >>> column1.transform(lambda cell: cell.dt.second()) + +------+ + | a | + | --- | + | i8 | + +======+ + | 0 | + | 30 | + | null | + +------+ + + >>> column2 = Column("a", [time(second=0), time(second=30), None]) + >>> column2.transform(lambda cell: cell.dt.second()) + +------+ + | a | + | --- | + | i8 | + +======+ + | 0 | + | 30 | + | null | + +------+ + """ + + @abstractmethod + def time(self) -> Cell[python_time | None]: + """ + Extract the time from a datetime. + + Returns + ------- + cell: + The time. + + Examples + -------- + >>> from datetime import datetime + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [datetime(1999, 12, 31), datetime(2000, 1, 1, 12, 30, 0), None]) + >>> column.transform(lambda cell: cell.dt.time()) + +----------+ + | a | + | --- | + | time | + +==========+ + | 00:00:00 | + | 12:30:00 | + | null | + +----------+ + """ + + @abstractmethod + def week(self) -> Cell[int | None]: + """ + Extract the ISO 8601 week number from a datetime or date. + + The week is a number between 1 and 53. The first week of a year is the week that contains the first Thursday of + the year. The last week of a year is the week that contains the last Thursday of the year. In other words, a + week is associated with a year if it contains the majority of its days. + + Returns + ------- + cell: + The week. + + Examples + -------- + >>> from datetime import date, datetime + >>> from safeds.data.tabular.containers import Column + >>> column1 = Column("a", [datetime(1999, 12, 31), datetime(2000, 1, 2), datetime(2001, 12, 31), None]) + >>> column1.transform(lambda cell: cell.dt.week()) + +------+ + | a | + | --- | + | i8 | + +======+ + | 52 | + | 52 | + | 1 | + | null | + +------+ + + >>> column2 = Column("a", [date(1999, 12, 31), date(2000, 1, 2), datetime(2001, 12, 31), None]) + >>> column2.transform(lambda cell: cell.dt.week()) + +------+ + | a | + | --- | + | i8 | + +======+ + | 52 | + | 52 | + | 1 | + | null | + +------+ + """ + + @abstractmethod + def year(self) -> Cell[int | None]: + """ + Extract the year from a datetime or date. + + Returns + ------- + cell: + The year. + + Examples + -------- + >>> from datetime import date, datetime + >>> from safeds.data.tabular.containers import Column + >>> column1 = Column("a", [datetime(1999, 12, 31), datetime(2000, 1, 1), None]) + >>> column1.transform(lambda cell: cell.dt.year()) + +------+ + | a | + | --- | + | i32 | + +======+ + | 1999 | + | 2000 | + | null | + +------+ + + >>> column2 = Column("a", [date(1999, 12, 31), date(2000, 1, 1), None]) + >>> column2.transform(lambda cell: cell.dt.year()) + +------+ + | a | + | --- | + | i32 | + +======+ + | 1999 | + | 2000 | + | null | + +------+ + """ + + # ------------------------------------------------------------------------------------------------------------------ + # Other operations + # ------------------------------------------------------------------------------------------------------------------ + + @abstractmethod + def is_in_leap_year(self) -> Cell[bool | None]: + """ + Check a datetime or date is in a leap year. + + Returns + ------- + cell: + Whether the year is a leap year. + + Examples + -------- + >>> from datetime import date, datetime + >>> from safeds.data.tabular.containers import Column + >>> column1 = Column("a", [datetime(1900, 1, 1), datetime(2000, 1, 1), None]) + >>> column1.transform(lambda cell: cell.dt.is_in_leap_year()) + +-------+ + | a | + | --- | + | bool | + +=======+ + | false | + | true | + | null | + +-------+ + + >>> column2 = Column("a", [date(1900, 1, 1), date(2000, 1, 1), None]) + >>> column2.transform(lambda cell: cell.dt.is_in_leap_year()) + +-------+ + | a | + | --- | + | bool | + +=======+ + | false | + | true | + | null | + +-------+ + """ + + @abstractmethod + def replace( + self, + *, + year: _ConvertibleToIntCell = None, + month: _ConvertibleToIntCell = None, + day: _ConvertibleToIntCell = None, + hour: _ConvertibleToIntCell = None, + minute: _ConvertibleToIntCell = None, + second: _ConvertibleToIntCell = None, + microsecond: _ConvertibleToIntCell = None, + ) -> Cell: + """ + Replace components of a datetime or date. + + If a component is not provided, it is not changed. Components that are not applicable to the object are ignored, + e.g. setting the hour of a date. Invalid results are converted to missing values (`None`). + + Parameters + ---------- + year: + The new year. + month: + The new month. Must be between 1 and 12. + day: + The new day. Must be between 1 and 31. + hour: + The new hour. Must be between 0 and 23. + minute: + The new minute. Must be between 0 and 59. + second: + The new second. Must be between 0 and 59. + microsecond: + The new microsecond. Must be between 0 and 999999. + + Returns + ------- + cell: + The new datetime or date. + + Examples + -------- + >>> from datetime import date, datetime + >>> from safeds.data.tabular.containers import Column + >>> column1 = Column("a", [datetime(2000, 1, 1), None]) + >>> column1.transform(lambda cell: cell.dt.replace(month=2, day=2, hour=2)) + +---------------------+ + | a | + | --- | + | datetime[μs] | + +=====================+ + | 2000-02-02 02:00:00 | + | null | + +---------------------+ + + >>> column2 = Column("a", [date(2000, 1, 1), None]) + >>> column2.transform(lambda cell: cell.dt.replace(month=2, day=2, hour=2)) + +------------+ + | a | + | --- | + | date | + +============+ + | 2000-02-02 | + | null | + +------------+ + """ + + @abstractmethod + def to_string(self, *, format: str = "iso") -> Cell[str | None]: + r""" + Convert a datetime, date, or time to a string. + + The `format` parameter controls the presentation. It can be `"iso"` to target ISO 8601 or a custom string. The + custom string can contain fixed specifiers (see below), which are replaced with the corresponding values. The + specifiers are case-sensitive and always enclosed in curly braces. Other text is included in the output + verbatim. To include a literal opening curly brace, use `\{`, and to include a literal backslash, use `\\`. + + The following specifiers for _date components_ are available for **datetime** and **date**: + + - `{Y}`, `{_Y}`, `{^Y}`: Year (zero-padded to four digits, space-padded to four digits, no padding). + - `{Y99}`, `{_Y99}`, `{^Y99}`: Year modulo 100 (zero-padded to two digits, space-padded to two digits, no + padding). + - `{M}`, `{_M}`, `{^M}`: Month (zero-padded to two digits, space-padded to two digits, no padding). + - `{M-full}`: Full name of the month (e.g. "January"). + - `{M-short}`: Abbreviated name of the month with three letters (e.g. "Jan"). + - `{W}`, `{_W}`, `{^W}`: Week number as defined by ISO 8601 (zero-padded to two digits, space-padded to two + digits, no padding). + - `{D}`, `{_D}`, `{^D}`: Day of the month (zero-padded to two digits, space-padded to two digits, no padding). + - `{DOW}`: Day of the week as defined by ISO 8601 (1 = Monday, 7 = Sunday). + - `{DOW-full}`: Full name of the day of the week (e.g. "Monday"). + - `{DOW-short}`: Abbreviated name of the day of the week with three letters (e.g. "Mon"). + - `{DOY}`, `{_DOY}`, `{^DOY}`: Day of the year, ranging from 1 to 366 (zero-padded to three digits, space-padded + to three digits, no padding). + + The following specifiers for _time components_ are available for **datetime** and **time**: + + - `{h}`, `{_h}`, `{^h}`: Hour (zero-padded to two digits, space-padded to two digits, no padding). + - `{h12}`, `{_h12}`, `{^h12}`: Hour in 12-hour format (zero-padded to two digits, space-padded to two digits, no + padding). + - `{m}`, `{_m}`, `{^m}`: Minute (zero-padded to two digits, space-padded to two digits, no padding). + - `{s}`, `{_s}`, `{^s}`: Second (zero-padded to two digits, space-padded to two digits, no padding). + - `{.f}`: Fractional seconds with a leading decimal point. + - `{ms}`: Millisecond (zero-padded to three digits). + - `{us}`: Microsecond (zero-padded to six digits). + - `{ns}`: Nanosecond (zero-padded to nine digits). + - `{AM/PM}`: AM or PM (uppercase). + - `{am/pm}`: am or pm (lowercase). + + The following specifiers are available for **datetime** only: + + - `{z}`: Offset of the timezone from UTC without a colon (e.g. "+0000"). + - `{:z}`: Offset of the timezone from UTC with a colon (e.g. "+00:00"). + - `{u}`: The UNIX timestamp in seconds. + + The specifiers follow certain conventions: + + - Generally, date components use uppercase letters and time components use lowercase letters. + - If a component may be formatted in multiple ways, we use shorter specifiers for ISO 8601. Specifiers for + other formats have a prefix (same value with different padding, see below) or suffix (other differences). + - By default, value are zero-padded, where applicable. + - A leading underscore (`_`) means the value is space-padded. + - A leading caret (`^`) means the value has no padding (think of the caret in regular expressions). + + Parameters + ---------- + format: + The format to use. + + Returns + ------- + cell: + The string representation. + + Raises + ------ + ValueError + If the format is invalid. + + Examples + -------- + >>> from datetime import date, datetime + >>> from safeds.data.tabular.containers import Column + >>> column1 = Column("a", [datetime(1999, 12, 31), datetime(2000, 1, 1, 12, 30, 0), None]) + >>> column1.transform(lambda cell: cell.dt.to_string()) + +----------------------------+ + | a | + | --- | + | str | + +============================+ + | 1999-12-31T00:00:00.000000 | + | 2000-01-01T12:30:00.000000 | + | null | + +----------------------------+ + + >>> column1.transform(lambda cell: cell.dt.to_string( + ... format="{DOW-short} {D}-{M-short}-{Y} {h12}:{m}:{s} {AM/PM}" + ... )) + +-----------------------------+ + | a | + | --- | + | str | + +=============================+ + | Fri 31-Dec-1999 12:00:00 AM | + | Sat 01-Jan-2000 12:30:00 PM | + | null | + +-----------------------------+ + + >>> column2 = Column("a", [date(1999, 12, 31), date(2000, 1, 1), None]) + >>> column2.transform(lambda cell: cell.dt.to_string()) + +------------+ + | a | + | --- | + | str | + +============+ + | 1999-12-31 | + | 2000-01-01 | + | null | + +------------+ + + >>> column2.transform(lambda cell: cell.dt.to_string( + ... format="{M}/{D}/{Y}" + ... )) + +------------+ + | a | + | --- | + | str | + +============+ + | 12/31/1999 | + | 01/01/2000 | + | null | + +------------+ + """ + + @abstractmethod + def unix_timestamp(self, *, unit: Literal["s", "ms", "us"] = "s") -> Cell[int | None]: + """ + Get the Unix timestamp from a datetime. + + A Unix timestamp is the elapsed time since 00:00:00 UTC on 1 January 1970. By default, this method returns the + value in seconds, but that can be changed with the `unit` parameter. + + Parameters + ---------- + unit: + The unit of the timestamp. Can be "s" (seconds), "ms" (milliseconds), or "us" (microseconds). + + Returns + ------- + cell: + The Unix timestamp. + + Examples + -------- + >>> from datetime import datetime + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [datetime(1970, 1, 1), datetime(1970, 1, 2), None]) + >>> column.transform(lambda cell: cell.dt.unix_timestamp()) + +-------+ + | a | + | --- | + | i64 | + +=======+ + | 0 | + | 86400 | + | null | + +-------+ + + >>> column.transform(lambda cell: cell.dt.unix_timestamp(unit="ms")) + +----------+ + | a | + | --- | + | i64 | + +==========+ + | 0 | + | 86400000 | + | null | + +----------+ + """ diff --git a/src/safeds/data/tabular/query/_duration_operations.py b/src/safeds/data/tabular/query/_duration_operations.py new file mode 100644 index 000000000..30d032003 --- /dev/null +++ b/src/safeds/data/tabular/query/_duration_operations.py @@ -0,0 +1,290 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING, Literal + +if TYPE_CHECKING: + from safeds.data.tabular.containers import Cell + + +# TODO: Examples with None + + +class DurationOperations(ABC): + """ + Namespace for operations on durations. + + This class cannot be instantiated directly. It can only be accessed using the `dur` attribute of a cell. + """ + + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + + @abstractmethod + def __eq__(self, other: object) -> bool: ... + + @abstractmethod + def __hash__(self) -> int: ... + + @abstractmethod + def __repr__(self) -> str: ... + + @abstractmethod + def __sizeof__(self) -> int: ... + + @abstractmethod + def __str__(self) -> str: ... + + # ------------------------------------------------------------------------------------------------------------------ + # Duration operations + # ------------------------------------------------------------------------------------------------------------------ + + @abstractmethod + def abs(self) -> Cell[None]: + """ + Get the absolute value of the duration. + + Returns + ------- + cell: + The absolute value. + + Examples + -------- + >>> from datetime import timedelta + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [timedelta(days=-1), timedelta(days=1), None]) + >>> column.transform(lambda cell: cell.dur.abs()) + +--------------+ + | a | + | --- | + | duration[μs] | + +==============+ + | 1d | + | 1d | + | null | + +--------------+ + """ + + @abstractmethod + def full_weeks(self) -> Cell[int | None]: + """ + Get the number of full weeks in the duration. The result is rounded toward zero. + + Returns + ------- + cell: + The number of full weeks. + + Examples + -------- + >>> from datetime import timedelta + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [timedelta(days=8), timedelta(days=6), None]) + >>> column.transform(lambda cell: cell.dur.full_weeks()) + +------+ + | a | + | --- | + | i64 | + +======+ + | 1 | + | 0 | + | null | + +------+ + """ + + @abstractmethod + def full_days(self) -> Cell[int | None]: + """ + Get the number of full days in the duration. The result is rounded toward zero. + + Returns + ------- + cell: + The number of full days. + + Examples + -------- + >>> from datetime import timedelta + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [timedelta(hours=25), timedelta(hours=23), None]) + >>> column.transform(lambda cell: cell.dur.full_days()) + +------+ + | a | + | --- | + | i64 | + +======+ + | 1 | + | 0 | + | null | + +------+ + """ + + @abstractmethod + def full_hours(self) -> Cell[int | None]: + """ + Get the number of full hours in the duration. The result is rounded toward zero. + + Returns + ------- + cell: + The number of full hours. + + Examples + -------- + >>> from datetime import timedelta + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [timedelta(minutes=61), timedelta(minutes=59), None]) + >>> column.transform(lambda cell: cell.dur.full_hours()) + +------+ + | a | + | --- | + | i64 | + +======+ + | 1 | + | 0 | + | null | + +------+ + """ + + @abstractmethod + def full_minutes(self) -> Cell[int | None]: + """ + Get the number of full minutes in the duration. The result is rounded toward zero. + + Returns + ------- + cell: + The number of full minutes. + + Examples + -------- + >>> from datetime import timedelta + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [timedelta(seconds=61), timedelta(seconds=59), None]) + >>> column.transform(lambda cell: cell.dur.full_minutes()) + +------+ + | a | + | --- | + | i64 | + +======+ + | 1 | + | 0 | + | null | + +------+ + """ + + @abstractmethod + def full_seconds(self) -> Cell[int | None]: + """ + Get the number of full seconds in the duration. The result is rounded toward zero. + + Returns + ------- + cell: + The number of full seconds. + + Examples + -------- + >>> from datetime import timedelta + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [timedelta(milliseconds=1001), timedelta(milliseconds=999), None]) + >>> column.transform(lambda cell: cell.dur.full_seconds()) + +------+ + | a | + | --- | + | i64 | + +======+ + | 1 | + | 0 | + | null | + +------+ + """ + + @abstractmethod + def full_milliseconds(self) -> Cell[int | None]: + """ + Get the number of full milliseconds in the duration. The result is rounded toward zero. + + Returns + ------- + cell: + The number of full milliseconds. + + Examples + -------- + >>> from datetime import timedelta + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [timedelta(microseconds=1001), timedelta(microseconds=999), None]) + >>> column.transform(lambda cell: cell.dur.full_milliseconds()) + +------+ + | a | + | --- | + | i64 | + +======+ + | 1 | + | 0 | + | null | + +------+ + """ + + @abstractmethod + def full_microseconds(self) -> Cell[int | None]: + """ + Get the number of full microseconds in the duration. The result is rounded toward zero. + + Since durations only have microsecond resolution at the moment, the rounding has no effect. This may change in + the future. + + Returns + ------- + cell: + The number of full microseconds. + + Examples + -------- + >>> from datetime import timedelta + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [timedelta(microseconds=1001), timedelta(microseconds=999), None]) + >>> column.transform(lambda cell: cell.dur.full_microseconds()) + +------+ + | a | + | --- | + | i64 | + +======+ + | 1001 | + | 999 | + | null | + +------+ + """ + + @abstractmethod + def to_string( + self, + *, + format: Literal["iso", "pretty"] = "iso", + ) -> Cell[str | None]: + """ + Convert the duration to a string. + + The following formats are supported: + + - `"iso"`: The duration is represented in the ISO 8601 format. This is the default. + - `"pretty"`: The duration is represented in a human-readable format. + + !!! warning "API Stability" + + Do not rely on the exact output of the `"pretty"` format. In future versions, we may change it without prior + notice. + + Parameters + ---------- + format: + The format to use. + + Returns + ------- + cell: + The string representation. + """ diff --git a/src/safeds/data/tabular/query/_lazy_datetime_operations.py b/src/safeds/data/tabular/query/_lazy_datetime_operations.py new file mode 100644 index 000000000..298c8fdb4 --- /dev/null +++ b/src/safeds/data/tabular/query/_lazy_datetime_operations.py @@ -0,0 +1,138 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Literal + +from safeds._utils import _structural_hash +from safeds._validation import _convert_and_check_datetime_format +from safeds.data.tabular.containers._lazy_cell import _LazyCell + +from ._datetime_operations import DatetimeOperations + +if TYPE_CHECKING: + import datetime as python_datetime + + import polars as pl + + from safeds._typing import _ConvertibleToIntCell + from safeds.data.tabular.containers._cell import Cell + + +class _LazyDatetimeOperations(DatetimeOperations): + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + + def __init__(self, expression: pl.Expr) -> None: + self._expression: pl.Expr = expression + + def __eq__(self, other: object) -> bool: + if not isinstance(other, _LazyDatetimeOperations): + return NotImplemented + if self is other: + return True + return self._expression.meta.eq(other._expression) + + def __hash__(self) -> int: + return _structural_hash(self._expression.meta.serialize()) + + def __repr__(self) -> str: + return f"_LazyDatetimeOperations({self._expression})" + + def __sizeof__(self) -> int: + return self._expression.__sizeof__() + + def __str__(self) -> str: + return f"({self._expression}).dt" + + # ------------------------------------------------------------------------------------------------------------------ + # Extract components + # ------------------------------------------------------------------------------------------------------------------ + + def century(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.century()) + + def date(self) -> Cell[python_datetime.date | None]: + return _LazyCell(self._expression.dt.date()) + + def day(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.day()) + + def day_of_year(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.ordinal_day()) + + def hour(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.hour()) + + def microsecond(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.microsecond()) + + def millennium(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.millennium()) + + def millisecond(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.millisecond()) + + def minute(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.minute()) + + def month(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.month()) + + def quarter(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.quarter()) + + def second(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.second()) + + def time(self) -> Cell[python_datetime.time | None]: + return _LazyCell(self._expression.dt.time()) + + def week(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.week()) + + def day_of_week(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.weekday()) + + def year(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.year()) + + # ------------------------------------------------------------------------------------------------------------------ + # Other operations + # ------------------------------------------------------------------------------------------------------------------ + + def is_in_leap_year(self) -> Cell[bool | None]: + return _LazyCell(self._expression.dt.is_leap_year()) + + def replace( + self, + *, + year: _ConvertibleToIntCell = None, + month: _ConvertibleToIntCell = None, + day: _ConvertibleToIntCell = None, + hour: _ConvertibleToIntCell = None, + minute: _ConvertibleToIntCell = None, + second: _ConvertibleToIntCell = None, + microsecond: _ConvertibleToIntCell = None, + ) -> Cell: + return _LazyCell( + self._expression.dt.replace( + year=year, + month=month, + day=day, + hour=hour, + minute=minute, + second=second, + microsecond=microsecond, + ), + ) + + def to_string(self, *, format: str = "iso") -> Cell[str | None]: + if format == "iso": + format = "iso:strict" # noqa: A001 + else: + format = _convert_and_check_datetime_format(format, type_="datetime", used_for_parsing=False) # noqa: A001 + + return _LazyCell(self._expression.dt.to_string(format=format)) + + def unix_timestamp(self, *, unit: Literal["s", "ms", "us"] = "s") -> Cell[int | None]: + return _LazyCell(self._expression.dt.epoch(time_unit=unit)) diff --git a/src/safeds/data/tabular/query/_lazy_duration_operations.py b/src/safeds/data/tabular/query/_lazy_duration_operations.py new file mode 100644 index 000000000..62f10b66f --- /dev/null +++ b/src/safeds/data/tabular/query/_lazy_duration_operations.py @@ -0,0 +1,80 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Literal + +from safeds._utils import _structural_hash +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import DurationOperations + +if TYPE_CHECKING: + import polars as pl + + from safeds.data.tabular.containers import Cell + + +class _LazyDurationOperations(DurationOperations): + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + + def __init__(self, expression: pl.Expr) -> None: + self._expression: pl.Expr = expression + + def __eq__(self, other: object) -> bool: + if not isinstance(other, _LazyDurationOperations): + return NotImplemented + if self is other: + return True + return self._expression.meta.eq(other._expression) + + def __hash__(self) -> int: + return _structural_hash(self._expression.meta.serialize()) + + def __repr__(self) -> str: + return f"_LazyDurationOperations({self._expression})" + + def __sizeof__(self) -> int: + return self._expression.__sizeof__() + + def __str__(self) -> str: + return f"({self._expression}).dur" + + # ------------------------------------------------------------------------------------------------------------------ + # Duration operations + # ------------------------------------------------------------------------------------------------------------------ + + def abs(self) -> Cell[None]: + return _LazyCell(self._expression.abs()) + + def full_weeks(self) -> Cell[int | None]: + import polars as pl + + # We must round towards zero + return _LazyCell((self._expression.dt.total_days() / 7).cast(pl.Int64())) + + def full_days(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.total_days()) + + def full_hours(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.total_hours()) + + def full_minutes(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.total_minutes()) + + def full_seconds(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.total_seconds()) + + def full_milliseconds(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.total_milliseconds()) + + def full_microseconds(self) -> Cell[int | None]: + return _LazyCell(self._expression.dt.total_microseconds()) + + def to_string( + self, + *, + format: Literal["iso", "pretty"] = "iso", + ) -> Cell[str | None]: + polars_format = "iso" if format == "iso" else "polars" + + return _LazyCell(self._expression.dt.to_string(polars_format)) diff --git a/src/safeds/data/tabular/query/_lazy_math_operations.py b/src/safeds/data/tabular/query/_lazy_math_operations.py new file mode 100644 index 000000000..b141fe143 --- /dev/null +++ b/src/safeds/data/tabular/query/_lazy_math_operations.py @@ -0,0 +1,53 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from safeds._utils import _structural_hash +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query._math_operations import MathOperations + +if TYPE_CHECKING: + import polars as pl + + from safeds.data.tabular.containers import Cell + + +class _LazyMathOperations(MathOperations): + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + + def __init__(self, expression: pl.Expr) -> None: + self._expression: pl.Expr = expression + + def __eq__(self, other: object) -> bool: + if not isinstance(other, _LazyMathOperations): + return NotImplemented + if self is other: + return True + return self._expression.meta.eq(other._expression) + + def __hash__(self) -> int: + return _structural_hash(self._expression.meta.serialize()) + + def __repr__(self) -> str: + return f"_LazyMathOperations({self._expression})" + + def __sizeof__(self) -> int: + return self._expression.__sizeof__() + + def __str__(self) -> str: + return f"({self._expression}).math" + + # ------------------------------------------------------------------------------------------------------------------ + # Math operations + # ------------------------------------------------------------------------------------------------------------------ + + def abs(self) -> Cell: + return _LazyCell(self._expression.__abs__()) + + def ceil(self) -> Cell: + return _LazyCell(self._expression.ceil()) + + def floor(self) -> Cell: + return _LazyCell(self._expression.floor()) diff --git a/src/safeds/data/tabular/query/_lazy_string_operations.py b/src/safeds/data/tabular/query/_lazy_string_operations.py new file mode 100644 index 000000000..94cc4ac25 --- /dev/null +++ b/src/safeds/data/tabular/query/_lazy_string_operations.py @@ -0,0 +1,123 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from safeds._utils import _structural_hash +from safeds._validation import _check_bounds, _ClosedBound, _convert_and_check_datetime_format +from safeds.data.tabular.containers._lazy_cell import _LazyCell + +from ._string_operations import StringOperations + +if TYPE_CHECKING: + import datetime + + import polars as pl + + from safeds._typing import _ConvertibleToIntCell, _ConvertibleToStringCell + from safeds.data.tabular.containers._cell import Cell + + +class _LazyStringOperations(StringOperations): + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + + def __init__(self, expression: pl.Expr) -> None: + self._expression: pl.Expr = expression + + def __eq__(self, other: object) -> bool: + if not isinstance(other, _LazyStringOperations): + return NotImplemented + if self is other: + return True + return self._expression.meta.eq(other._expression) + + def __hash__(self) -> int: + return _structural_hash(self._expression.meta.serialize()) + + def __repr__(self) -> str: + return f"_LazyStringOperations({self._expression})" + + def __sizeof__(self) -> int: + return self._expression.__sizeof__() + + def __str__(self) -> str: + return f"({self._expression}).str" + + # ------------------------------------------------------------------------------------------------------------------ + # String operations + # ------------------------------------------------------------------------------------------------------------------ + + def contains(self, substring: _ConvertibleToStringCell) -> Cell[bool | None]: + return _LazyCell(self._expression.str.contains(substring, literal=True)) + + def length(self, optimize_for_ascii: bool = False) -> Cell[int | None]: + if optimize_for_ascii: + return _LazyCell(self._expression.str.len_bytes()) + else: + return _LazyCell(self._expression.str.len_chars()) + + def ends_with(self, suffix: _ConvertibleToStringCell) -> Cell[bool | None]: + return _LazyCell(self._expression.str.ends_with(suffix)) + + def index_of(self, substring: _ConvertibleToStringCell) -> Cell[int | None]: + return _LazyCell(self._expression.str.find(substring, literal=True)) + + def replace(self, old: _ConvertibleToStringCell, new: _ConvertibleToStringCell) -> Cell[str | None]: + return _LazyCell(self._expression.str.replace_all(old, new, literal=True)) + + def starts_with(self, prefix: _ConvertibleToStringCell) -> Cell[bool | None]: + return _LazyCell(self._expression.str.starts_with(prefix)) + + def substring( + self, + *, + start: _ConvertibleToIntCell = 0, + length: _ConvertibleToIntCell = None, + ) -> Cell[str | None]: + if isinstance(length, int): + _check_bounds("length", length, lower_bound=_ClosedBound(0)) + + return _LazyCell(self._expression.str.slice(start, length)) + + def to_date(self, *, format: str | None = "iso") -> Cell[datetime.date | None]: + if format == "iso": + format = "%F" # noqa: A001 + elif format is not None: + format = _convert_and_check_datetime_format(format, type_="date", used_for_parsing=True) # noqa: A001 + + return _LazyCell(self._expression.str.to_date(format=format, strict=False)) + + def to_datetime(self, *, format: str | None = "iso") -> Cell[datetime.datetime | None]: + if format == "iso": + format = "%+" # noqa: A001 + elif format is not None: + format = _convert_and_check_datetime_format(format, type_="datetime", used_for_parsing=True) # noqa: A001 + + return _LazyCell(self._expression.str.to_datetime(format=format, strict=False)) + + def to_int(self, *, base: _ConvertibleToIntCell = 10) -> Cell[int | None]: + return _LazyCell(self._expression.str.to_integer(base=base, strict=False)) + + def to_lowercase(self) -> Cell[str | None]: + return _LazyCell(self._expression.str.to_lowercase()) + + def to_time(self, *, format: str | None = "iso") -> Cell[datetime.time | None]: + if format == "iso": + format = "%T" # noqa: A001 + elif format is not None: + format = _convert_and_check_datetime_format(format, type_="time", used_for_parsing=True) # noqa: A001 + + return _LazyCell(self._expression.str.to_time(format=format, strict=False)) + + def to_uppercase(self) -> Cell[str | None]: + return _LazyCell(self._expression.str.to_uppercase()) + + def trim(self) -> Cell[str | None]: + return _LazyCell(self._expression.str.strip_chars()) + + def trim_end(self) -> Cell[str | None]: + return _LazyCell(self._expression.str.strip_chars_end()) + + def trim_start(self) -> Cell[str | None]: + return _LazyCell(self._expression.str.strip_chars_start()) diff --git a/src/safeds/data/tabular/query/_math_operations.py b/src/safeds/data/tabular/query/_math_operations.py new file mode 100644 index 000000000..7f7b208a7 --- /dev/null +++ b/src/safeds/data/tabular/query/_math_operations.py @@ -0,0 +1,101 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from safeds.data.tabular.containers import Cell + + +class MathOperations(ABC): + """ + Namespace for mathematical operations. + + This class cannot be instantiated directly. It can only be accessed using the `math` attribute of a cell. + """ + + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + + @abstractmethod + def __eq__(self, other: object) -> bool: ... + + @abstractmethod + def __hash__(self) -> int: ... + + @abstractmethod + def __repr__(self) -> str: ... + + @abstractmethod + def __sizeof__(self) -> int: ... + + @abstractmethod + def __str__(self) -> str: ... + + # ------------------------------------------------------------------------------------------------------------------ + # Math operations + # ------------------------------------------------------------------------------------------------------------------ + + @abstractmethod + def abs(self) -> Cell: + """ + Get the absolute value. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [1, -2, None]) + >>> column.transform(lambda cell: cell.math.abs()) + +------+ + | a | + | --- | + | i64 | + +======+ + | 1 | + | 2 | + | null | + +------+ + """ + + @abstractmethod + def ceil(self) -> Cell: + """ + Round up to the nearest integer. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [1.1, 3.0, None]) + >>> column.transform(lambda cell: cell.math.ceil()) + +---------+ + | a | + | --- | + | f64 | + +=========+ + | 2.00000 | + | 3.00000 | + | null | + +---------+ + """ + + @abstractmethod + def floor(self) -> Cell: + """ + Round down to the nearest integer. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("a", [1.1, 3.0, None]) + >>> column.transform(lambda cell: cell.math.floor()) + +---------+ + | a | + | --- | + | f64 | + +=========+ + | 1.00000 | + | 3.00000 | + | null | + +---------+ + """ diff --git a/src/safeds/data/tabular/containers/_string_cell.py b/src/safeds/data/tabular/query/_string_operations.py similarity index 57% rename from src/safeds/data/tabular/containers/_string_cell.py rename to src/safeds/data/tabular/query/_string_operations.py index e47a41444..6b99afe59 100644 --- a/src/safeds/data/tabular/containers/_string_cell.py +++ b/src/safeds/data/tabular/query/_string_operations.py @@ -6,10 +6,17 @@ if TYPE_CHECKING: import datetime + from safeds._typing import _ConvertibleToIntCell, _ConvertibleToStringCell from safeds.data.tabular.containers import Cell +# TODO: examples with None +# TODO: add more methods +# - reverse +# - to_time +# - ... -class StringCell(ABC): + +class StringOperations(ABC): """ Namespace for operations on strings. @@ -18,21 +25,44 @@ class StringCell(ABC): Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", ["ab", "bc", "cd"]) + >>> column = Column("a", ["ab", "bc", "cd"]) >>> column.transform(lambda cell: cell.str.to_uppercase()) - +---------+ - | example | - | --- | - | str | - +=========+ - | AB | - | BC | - | CD | - +---------+ + +-----+ + | a | + | --- | + | str | + +=====+ + | AB | + | BC | + | CD | + +-----+ """ + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + + @abstractmethod + def __eq__(self, other: object) -> bool: ... + + @abstractmethod + def __hash__(self) -> int: ... + + @abstractmethod + def __repr__(self) -> str: ... + @abstractmethod - def contains(self, substring: str) -> Cell[bool]: + def __sizeof__(self) -> int: ... + + @abstractmethod + def __str__(self) -> str: ... + + # ------------------------------------------------------------------------------------------------------------------ + # String operations + # ------------------------------------------------------------------------------------------------------------------ + + @abstractmethod + def contains(self, substring: _ConvertibleToStringCell) -> Cell[bool | None]: """ Check if the string value in the cell contains the substring. @@ -49,13 +79,22 @@ def contains(self, substring: str) -> Cell[bool]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", ["ab", "bc", "cd"]) - >>> column.count_if(lambda cell: cell.str.contains("b")) - 2 + >>> column = Column("a", ["ab", "bc", "cd", None]) + >>> column.transform(lambda cell: cell.str.contains("b")) + +-------+ + | a | + | --- | + | bool | + +=======+ + | true | + | true | + | false | + | null | + +-------+ """ @abstractmethod - def ends_with(self, suffix: str) -> Cell[bool]: + def ends_with(self, suffix: _ConvertibleToStringCell) -> Cell[bool | None]: """ Check if the string value in the cell ends with the suffix. @@ -72,13 +111,22 @@ def ends_with(self, suffix: str) -> Cell[bool]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", ["ab", "bc", "cd"]) - >>> column.count_if(lambda cell: cell.str.ends_with("c")) - 1 + >>> column = Column("a", ["ab", "bc", "cd", None]) + >>> column.transform(lambda cell: cell.str.ends_with("c")) + +-------+ + | a | + | --- | + | bool | + +=======+ + | false | + | true | + | false | + | null | + +-------+ """ @abstractmethod - def index_of(self, substring: str) -> Cell[int | None]: + def index_of(self, substring: _ConvertibleToStringCell) -> Cell[int | None]: """ Get the index of the first occurrence of the substring in the string value in the cell. @@ -95,21 +143,22 @@ def index_of(self, substring: str) -> Cell[int | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", ["ab", "bc", "cd"]) + >>> column = Column("a", ["ab", "bc", "cd", None]) >>> column.transform(lambda cell: cell.str.index_of("b")) - +---------+ - | example | - | --- | - | u32 | - +=========+ - | 1 | - | 0 | - | null | - +---------+ + +------+ + | a | + | --- | + | u32 | + +======+ + | 1 | + | 0 | + | null | + | null | + +------+ """ @abstractmethod - def length(self, *, optimize_for_ascii: bool = False) -> Cell[int]: + def length(self, *, optimize_for_ascii: bool = False) -> Cell[int | None]: """ Get the number of characters of the string value in the cell. @@ -127,21 +176,22 @@ def length(self, *, optimize_for_ascii: bool = False) -> Cell[int]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", ["", "a", "abc"]) + >>> column = Column("a", ["", "a", "abc", None]) >>> column.transform(lambda cell: cell.str.length()) - +---------+ - | example | - | --- | - | u32 | - +=========+ - | 0 | - | 1 | - | 3 | - +---------+ + +------+ + | a | + | --- | + | u32 | + +======+ + | 0 | + | 1 | + | 3 | + | null | + +------+ """ @abstractmethod - def replace(self, old: str, new: str) -> Cell[str]: + def replace(self, old: _ConvertibleToStringCell, new: _ConvertibleToStringCell) -> Cell[str | None]: """ Replace occurrences of the old substring with the new substring in the string value in the cell. @@ -160,21 +210,22 @@ def replace(self, old: str, new: str) -> Cell[str]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", ["ab", "bc", "cd"]) + >>> column = Column("a", ["ab", "bc", "cd", None]) >>> column.transform(lambda cell: cell.str.replace("b", "z")) - +---------+ - | example | - | --- | - | str | - +=========+ - | az | - | zc | - | cd | - +---------+ + +------+ + | a | + | --- | + | str | + +======+ + | az | + | zc | + | cd | + | null | + +------+ """ @abstractmethod - def starts_with(self, prefix: str) -> Cell[bool]: + def starts_with(self, prefix: _ConvertibleToStringCell) -> Cell[bool | None]: """ Check if the string value in the cell starts with the prefix. @@ -191,13 +242,27 @@ def starts_with(self, prefix: str) -> Cell[bool]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", ["ab", "bc", "cd"]) - >>> column.count_if(lambda cell: cell.str.starts_with("a")) - 1 + >>> column = Column("a", ["ab", "bc", "cd", None]) + >>> column.transform(lambda cell: cell.str.starts_with("a")) + +-------+ + | a | + | --- | + | bool | + +=======+ + | true | + | false | + | false | + | null | + +-------+ """ @abstractmethod - def substring(self, start: int = 0, length: int | None = None) -> Cell[str]: + def substring( + self, + *, + start: _ConvertibleToIntCell = 0, + length: _ConvertibleToIntCell = None, + ) -> Cell[str | None]: """ Get a substring of the string value in the cell. @@ -222,23 +287,25 @@ def substring(self, start: int = 0, length: int | None = None) -> Cell[str]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", ["abc", "def", "ghi"]) - >>> column.transform(lambda cell: cell.str.substring(1, 2)) - +---------+ - | example | - | --- | - | str | - +=========+ - | bc | - | ef | - | hi | - +---------+ - """ - + >>> column = Column("a", ["abc", "def", "ghi", None]) + >>> column.transform(lambda cell: cell.str.substring(start=1, length=2)) + +------+ + | a | + | --- | + | str | + +======+ + | bc | + | ef | + | hi | + | null | + +------+ + """ + + # TODO: add format parameter + document @abstractmethod - def to_date(self) -> Cell[datetime.date | None]: + def to_date(self, *, format: str | None = "iso") -> Cell[datetime.date | None]: """ - Convert the string value in the cell to a date. Requires the string to be in the ISO 8601 format. + Convert the string value in the cell to a date. Returns ------- @@ -248,23 +315,25 @@ def to_date(self) -> Cell[datetime.date | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", ["2021-01-01", "2021-02-01", "abc"]) + >>> column = Column("a", ["2021-01-01", "2021-02-01", "abc", None]) >>> column.transform(lambda cell: cell.str.to_date()) +------------+ - | example | + | a | | --- | | date | +============+ | 2021-01-01 | | 2021-02-01 | | null | + | null | +------------+ """ + # TODO: add format parameter + document @abstractmethod - def to_datetime(self) -> Cell[datetime.datetime | None]: + def to_datetime(self, *, format: str | None = "iso") -> Cell[datetime.datetime | None]: """ - Convert the string value in the cell to a datetime. Requires the string to be in the ISO 8601 format. + Convert the string value in the cell to a datetime. Returns ------- @@ -274,48 +343,24 @@ def to_datetime(self) -> Cell[datetime.datetime | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", ["2021-01-01T00:00:00z", "2021-02-01T00:00:00z", "abc"]) + >>> column = Column("a", ["2021-01-01T00:00:00Z", "2021-02-01T00:00:00Z", "abc", None]) >>> column.transform(lambda cell: cell.str.to_datetime()) +-------------------------+ - | example | + | a | | --- | | datetime[μs, UTC] | +=========================+ | 2021-01-01 00:00:00 UTC | | 2021-02-01 00:00:00 UTC | | null | + | null | +-------------------------+ """ - @abstractmethod - def to_float(self) -> Cell[float | None]: - """ - Convert the string value in the cell to a float. - - Returns - ------- - float: - The float value. If the string cannot be converted to a float, None is returned. - - Examples - -------- - >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", ["1", "3.4", "5.6", "abc"]) - >>> column.transform(lambda cell: cell.str.to_float()) - +---------+ - | example | - | --- | - | f64 | - +=========+ - | 1.00000 | - | 3.40000 | - | 5.60000 | - | null | - +---------+ - """ + # TODO: add to_time @abstractmethod - def to_int(self, *, base: int = 10) -> Cell[int | None]: + def to_int(self, *, base: _ConvertibleToIntCell = 10) -> Cell[int | None]: """ Convert the string value in the cell to an integer. @@ -332,35 +377,37 @@ def to_int(self, *, base: int = 10) -> Cell[int | None]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column1 = Column("example", ["1", "2", "3", "abc"]) + >>> column1 = Column("a", ["1", "2", "3", "abc", None]) >>> column1.transform(lambda cell: cell.str.to_int()) - +---------+ - | example | - | --- | - | i64 | - +=========+ - | 1 | - | 2 | - | 3 | - | null | - +---------+ - - >>> column2 = Column("example", ["1", "10", "11", "abc"]) + +------+ + | a | + | --- | + | i64 | + +======+ + | 1 | + | 2 | + | 3 | + | null | + | null | + +------+ + + >>> column2 = Column("a", ["1", "10", "11", "abc", None]) >>> column2.transform(lambda cell: cell.str.to_int(base=2)) - +---------+ - | example | - | --- | - | i64 | - +=========+ - | 1 | - | 2 | - | 3 | - | null | - +---------+ + +------+ + | a | + | --- | + | i64 | + +======+ + | 1 | + | 2 | + | 3 | + | null | + | null | + +------+ """ @abstractmethod - def to_lowercase(self) -> Cell[str]: + def to_lowercase(self) -> Cell[str | None]: """ Convert the string value in the cell to lowercase. @@ -372,21 +419,22 @@ def to_lowercase(self) -> Cell[str]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", ["AB", "BC", "CD"]) + >>> column = Column("a", ["AB", "BC", "CD", None]) >>> column.transform(lambda cell: cell.str.to_lowercase()) - +---------+ - | example | - | --- | - | str | - +=========+ - | ab | - | bc | - | cd | - +---------+ + +------+ + | a | + | --- | + | str | + +======+ + | ab | + | bc | + | cd | + | null | + +------+ """ @abstractmethod - def to_uppercase(self) -> Cell[str]: + def to_uppercase(self) -> Cell[str | None]: """ Convert the string value in the cell to uppercase. @@ -398,21 +446,22 @@ def to_uppercase(self) -> Cell[str]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", ["ab", "bc", "cd"]) + >>> column = Column("a", ["ab", "bc", "cd", None]) >>> column.transform(lambda cell: cell.str.to_uppercase()) - +---------+ - | example | - | --- | - | str | - +=========+ - | AB | - | BC | - | CD | - +---------+ + +------+ + | a | + | --- | + | str | + +======+ + | AB | + | BC | + | CD | + | null | + +------+ """ @abstractmethod - def trim(self) -> Cell[str]: + def trim(self) -> Cell[str | None]: """ Remove whitespace from the start and end of the string value in the cell. @@ -424,22 +473,23 @@ def trim(self) -> Cell[str]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", ["", " abc", "abc ", " abc "]) + >>> column = Column("a", ["", " abc", "abc ", " abc ", None]) >>> column.transform(lambda cell: cell.str.trim()) - +---------+ - | example | - | --- | - | str | - +=========+ - | | - | abc | - | abc | - | abc | - +---------+ + +------+ + | a | + | --- | + | str | + +======+ + | | + | abc | + | abc | + | abc | + | null | + +------+ """ @abstractmethod - def trim_end(self) -> Cell[str]: + def trim_end(self) -> Cell[str | None]: """ Remove whitespace from the end of the string value in the cell. @@ -451,22 +501,23 @@ def trim_end(self) -> Cell[str]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", ["", " abc", "abc ", " abc "]) + >>> column = Column("a", ["", " abc", "abc ", " abc ", None]) >>> column.transform(lambda cell: cell.str.trim_end()) - +---------+ - | example | - | --- | - | str | - +=========+ - | | - | abc | - | abc | - | abc | - +---------+ + +------+ + | a | + | --- | + | str | + +======+ + | | + | abc | + | abc | + | abc | + | null | + +------+ """ @abstractmethod - def trim_start(self) -> Cell[str]: + def trim_start(self) -> Cell[str | None]: """ Remove whitespace from the start of the string value in the cell. @@ -478,28 +529,17 @@ def trim_start(self) -> Cell[str]: Examples -------- >>> from safeds.data.tabular.containers import Column - >>> column = Column("example", ["", " abc", "abc ", " abc "]) + >>> column = Column("a", ["", " abc", "abc ", " abc ", None]) >>> column.transform(lambda cell: cell.str.trim_start()) - +---------+ - | example | - | --- | - | str | - +=========+ - | | - | abc | - | abc | - | abc | - +---------+ - """ - - # ------------------------------------------------------------------------------------------------------------------ - # Internal - # ------------------------------------------------------------------------------------------------------------------ - - @abstractmethod - def _equals(self, other: object) -> bool: - """ - Check if this cell is equal to another object. - - This method is needed because the `__eq__` method is used for element-wise comparisons. + +------+ + | a | + | --- | + | str | + +======+ + | | + | abc | + | abc | + | abc | + | null | + +------+ """ diff --git a/src/safeds/data/tabular/transformation/_range_scaler.py b/src/safeds/data/tabular/transformation/_range_scaler.py index 54def0379..005d77a44 100644 --- a/src/safeds/data/tabular/transformation/_range_scaler.py +++ b/src/safeds/data/tabular/transformation/_range_scaler.py @@ -19,9 +19,9 @@ class RangeScaler(InvertibleTableTransformer): Parameters ---------- - min_: + min: The minimum of the new range after the transformation - max_: + max: The maximum of the new range after the transformation selector: The list of columns used to fit the transformer. If `None`, all numeric columns are used. @@ -36,15 +36,15 @@ class RangeScaler(InvertibleTableTransformer): # Dunder methods # ------------------------------------------------------------------------------------------------------------------ - def __init__(self, *, selector: str | list[str] | None = None, min_: float = 0.0, max_: float = 1.0) -> None: + def __init__(self, *, selector: str | list[str] | None = None, min: float = 0.0, max: float = 1.0) -> None: super().__init__(selector) - if min_ >= max_: + if min >= max: raise ValueError('Parameter "max_" must be greater than parameter "min_".') # Parameters - self._min: float = min_ - self._max: float = max_ + self._min: float = min + self._max: float = max # Internal state self._data_min: pl.DataFrame | None = None @@ -121,7 +121,7 @@ def fit(self, table: Table) -> RangeScaler: _data_max = _safe_collect_lazy_frame(table._lazy_frame.select(column_names).max()) # Create a copy with the learned transformation - result = RangeScaler(min_=self._min, max_=self._max, selector=column_names) + result = RangeScaler(min=self._min, max=self._max, selector=column_names) result._data_min = _data_min result._data_max = _data_max diff --git a/src/safeds/data/tabular/typing/_column_type.py b/src/safeds/data/tabular/typing/_column_type.py index 532a3448e..7409e6af7 100644 --- a/src/safeds/data/tabular/typing/_column_type.py +++ b/src/safeds/data/tabular/typing/_column_type.py @@ -3,6 +3,8 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING +from safeds._validation import _check_time_zone + if TYPE_CHECKING: import polars as pl @@ -126,13 +128,24 @@ def date() -> ColumnType: return _PolarsColumnType(pl.Date()) @staticmethod - def datetime() -> ColumnType: - """Create a `datetime` column type.""" + def datetime(*, time_zone: str | None = None) -> ColumnType: + """ + Create a `datetime` column type. + + Parameters + ---------- + time_zone: + The time zone. If None, values are assumed to be in local time. This is different from setting the time zone + to `"UTC"`. Any TZ identifier defined in the + [tz database](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) is valid. + """ import polars as pl from ._polars_column_type import _PolarsColumnType # circular import - return _PolarsColumnType(pl.Datetime()) + _check_time_zone(time_zone) + + return _PolarsColumnType(pl.Datetime(time_zone=time_zone)) @staticmethod def duration() -> ColumnType: diff --git a/tests/helpers/_assertions.py b/tests/helpers/_assertions.py index 2f7397f9f..5846c28f9 100644 --- a/tests/helpers/_assertions.py +++ b/tests/helpers/_assertions.py @@ -84,7 +84,7 @@ def assert_cell_operation_works( The type of the column if the value is `None`. """ type_ = type_if_none if value is None else None - column = Column("A", [value], type_=type_) + column = Column("a", [value], type=type_) transformed_column = column.transform(transformer) actual = transformed_column[0] assert actual == expected, f"Expected {expected}, but got {actual}." diff --git a/tests/safeds/_utils/test_get_similar_strings.py b/tests/safeds/_utils/test_get_similar_strings.py new file mode 100644 index 000000000..3acd29b09 --- /dev/null +++ b/tests/safeds/_utils/test_get_similar_strings.py @@ -0,0 +1,38 @@ +import pytest + +from safeds._utils import _get_similar_strings + + +@pytest.mark.parametrize( + ("string", "valid_strings", "expected"), + [ + ( + "column1", + [], + [], + ), + ( + "column1", + ["column1", "column2"], + ["column1"], + ), + ( + "dissimilar", + ["column1", "column2", "column3"], + [], + ), + ( + "cilumn1", + ["column1", "x", "y"], + ["column1"], + ), + ( + "cilumn1", + ["column1", "column2", "y"], + ["column1", "column2"], + ), + ], + ids=["empty", "exact match", "no similar", "one similar", "multiple similar"], +) +def test_should_get_similar_strings(string: str, valid_strings: list[str], expected: list[str]) -> None: + assert _get_similar_strings(string, valid_strings) == expected diff --git a/tests/safeds/_validation/test_get_similar_columns.py b/tests/safeds/_validation/test_get_similar_columns.py deleted file mode 100644 index 1d6505a0b..000000000 --- a/tests/safeds/_validation/test_get_similar_columns.py +++ /dev/null @@ -1,39 +0,0 @@ -import pytest - -from safeds._validation._check_columns_exist_module import _get_similar_column_names -from safeds.data.tabular.containers import Table - - -@pytest.mark.parametrize( - ("table", "name", "expected"), - [ - ( - Table({}), - "column1", - [], - ), - ( - Table({"column1": [], "column2": []}), - "column1", - ["column1"], - ), - ( - Table({"column1": [], "column2": [], "column3": []}), - "dissimilar", - [], - ), - ( - Table({"column1": [], "x": [], "y": []}), - "cilumn1", - ["column1"], - ), - ( - Table({"column1": [], "column2": [], "y": []}), - "cilumn1", - ["column1", "column2"], - ), - ], - ids=["empty table", "exact match", "no similar", "one similar", "multiple similar"], -) -def test_should_get_similar_column_names(table: Table, name: str, expected: list[str]) -> None: - assert _get_similar_column_names(table.schema, name) == expected diff --git a/tests/safeds/data/tabular/containers/_column/test_from_lazy_frame.py b/tests/safeds/data/tabular/containers/_column/test_from_lazy_frame.py new file mode 100644 index 000000000..9791c3e08 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_column/test_from_lazy_frame.py @@ -0,0 +1,24 @@ +import polars as pl +import pytest + +from safeds.data.tabular.containers import Column + + +def test_should_store_the_name() -> None: + frame = pl.LazyFrame({"col1": []}) + assert Column._from_polars_lazy_frame("col1", frame).name == "col1" + + +@pytest.mark.parametrize( + ("frame", "expected"), + [ + (pl.LazyFrame({"col1": []}), []), + (pl.LazyFrame({"col1": [True]}), [True]), + ], + ids=[ + "empty", + "non-empty", + ], +) +def test_should_store_the_data(frame: pl.LazyFrame, expected: list) -> None: + assert list(Column._from_polars_lazy_frame("col1", frame)) == expected diff --git a/tests/safeds/data/tabular/containers/_column/test_from_polars_series.py b/tests/safeds/data/tabular/containers/_column/test_from_polars_series.py index 8a5348f15..59391852c 100644 --- a/tests/safeds/data/tabular/containers/_column/test_from_polars_series.py +++ b/tests/safeds/data/tabular/containers/_column/test_from_polars_series.py @@ -20,5 +20,5 @@ def test_should_store_the_name() -> None: "non-empty", ], ) -def test_should_store_the_data(series: pl.Series, expected: Column) -> None: +def test_should_store_the_data(series: pl.Series, expected: list) -> None: assert list(Column._from_polars_series(series)) == expected diff --git a/tests/safeds/data/tabular/containers/_column/test_init.py b/tests/safeds/data/tabular/containers/_column/test_init.py index fa45c4dcf..b64b93759 100644 --- a/tests/safeds/data/tabular/containers/_column/test_init.py +++ b/tests/safeds/data/tabular/containers/_column/test_init.py @@ -14,7 +14,7 @@ def test_should_store_the_name() -> None: [ (Column("col1", []), []), (Column("col1", [1]), [1]), - (Column("col1", [1], type_=ColumnType.string()), ["1"]), + (Column("col1", [1], type=ColumnType.string()), ["1"]), ], ids=[ "empty", @@ -31,7 +31,7 @@ def test_should_store_the_data(column: Column, expected: list) -> None: [ (Column("col1", []), ColumnType.null()), (Column("col1", [1]), ColumnType.int64()), - (Column("col1", [1], type_=ColumnType.string()), ColumnType.string()), + (Column("col1", [1], type=ColumnType.string()), ColumnType.string()), ], ids=[ "empty", diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_abs.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_abs.py index 7cc40c878..0a746ad90 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_abs.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_abs.py @@ -25,9 +25,8 @@ "None", ], ) -class TestShouldReturnAbsoluteValue: - def test_dunder_method(self, value: float | None, expected: float | None) -> None: - assert_cell_operation_works(value, lambda cell: abs(cell), expected, type_if_none=ColumnType.float64()) +def test_should_return_absolute_value(value: float | None, expected: float | None) -> None: + assert_cell_operation_works(value, lambda cell: abs(cell), expected, type_if_none=ColumnType.float64()) - def test_named_method(self, value: float | None, expected: float | None) -> None: - assert_cell_operation_works(value, lambda cell: cell.abs(), expected, type_if_none=ColumnType.float64()) + +# The corresponding named method is inside the `math` namespace. diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_ceil.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_ceil.py index 32dce3d20..01e16721f 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_ceil.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_ceil.py @@ -27,9 +27,8 @@ "None", ], ) -class TestShouldReturnCeiling: - def test_dunder_method(self, value: float | None, expected: float | None) -> None: - assert_cell_operation_works(value, lambda cell: math.ceil(cell), expected, type_if_none=ColumnType.float64()) +def test_should_return_ceiling(value: float | None, expected: float | None) -> None: + assert_cell_operation_works(value, lambda cell: math.ceil(cell), expected, type_if_none=ColumnType.float64()) - def test_named_method(self, value: float | None, expected: float | None) -> None: - assert_cell_operation_works(value, lambda cell: cell.ceil(), expected, type_if_none=ColumnType.float64()) + +# The corresponding named method is inside the `math` namespace. diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_datetime.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_datetime.py index dcab1d935..fd2586f9a 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_datetime.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_datetime.py @@ -1,14 +1,14 @@ -from datetime import datetime +from datetime import UTC, datetime import pytest from safeds._typing import _ConvertibleToIntCell -from safeds.data.tabular.containers import Cell +from safeds.data.tabular.containers import Cell, Column from tests.helpers import assert_cell_operation_works @pytest.mark.parametrize( - ("year", "month", "day", "hour", "minute", "second", "microsecond", "expected"), + ("year", "month", "day", "hour", "minute", "second", "microsecond", "time_zone", "expected"), [ ( 1, @@ -18,6 +18,7 @@ 5, 6, 7, + None, datetime(1, 2, 3, 4, 5, 6, 7), # noqa: DTZ001 ), ( @@ -28,34 +29,37 @@ Cell.constant(5), Cell.constant(6), Cell.constant(7), + None, datetime(1, 2, 3, 4, 5, 6, 7), # noqa: DTZ001 ), # invalid year - (None, 2, 3, 4, 5, 6, 7, None), + (None, 2, 3, 4, 5, 6, 7, None, None), # invalid month - (1, None, 3, 4, 5, 6, 7, None), - (1, 0, 3, 4, 5, 6, 7, None), - (1, 13, 3, 4, 5, 6, 7, None), + (1, None, 3, 4, 5, 6, 7, None, None), + (1, 0, 3, 4, 5, 6, 7, None, None), + (1, 13, 3, 4, 5, 6, 7, None, None), # invalid day - (1, 2, None, 4, 5, 6, 7, None), - (1, 2, 0, 4, 5, 6, 7, None), - (1, 2, 32, 4, 5, 6, 7, None), + (1, 2, None, 4, 5, 6, 7, None, None), + (1, 2, 0, 4, 5, 6, 7, None, None), + (1, 2, 32, 4, 5, 6, 7, None, None), # invalid hour - (1, 2, 3, None, 5, 6, 7, None), - (1, 2, 3, -1, 5, 6, 7, None), - (1, 2, 3, 24, 5, 6, 7, None), + (1, 2, 3, None, 5, 6, 7, None, None), + (1, 2, 3, -1, 5, 6, 7, None, None), + (1, 2, 3, 24, 5, 6, 7, None, None), # invalid minute - (1, 2, 3, 4, None, 6, 7, None), - (1, 2, 3, 4, -1, 6, 7, None), - (1, 2, 3, 4, 60, 6, 7, None), + (1, 2, 3, 4, None, 6, 7, None, None), + (1, 2, 3, 4, -1, 6, 7, None, None), + (1, 2, 3, 4, 60, 6, 7, None, None), # invalid second - (1, 2, 3, 4, 5, None, 7, None), - (1, 2, 3, 4, 5, -1, 7, None), - (1, 2, 3, 4, 5, 60, 7, None), + (1, 2, 3, 4, 5, None, 7, None, None), + (1, 2, 3, 4, 5, -1, 7, None, None), + (1, 2, 3, 4, 5, 60, 7, None, None), # invalid microsecond - (1, 2, 3, 4, 5, 6, None, None), - (1, 2, 3, 4, 5, 6, -1, None), - (1, 2, 3, 4, 5, 6, 1_000_000, None), + (1, 2, 3, 4, 5, 6, None, None, None), + (1, 2, 3, 4, 5, 6, -1, None, None), + (1, 2, 3, 4, 5, 6, 1_000_000, None, None), + # with time zone + (1, 2, 3, 4, 5, 6, 7, "UTC", datetime(1, 2, 3, 4, 5, 6, 7, tzinfo=UTC)), ], ids=[ "int components", @@ -79,6 +83,7 @@ "microsecond is None", "microsecond is too low", "microsecond is too high", + "with time zone", ], ) def test_should_return_datetime( @@ -89,6 +94,7 @@ def test_should_return_datetime( minute: _ConvertibleToIntCell, second: _ConvertibleToIntCell, microsecond: _ConvertibleToIntCell, + time_zone: str | None, expected: datetime, ) -> None: assert_cell_operation_works( @@ -101,6 +107,13 @@ def test_should_return_datetime( minute=minute, second=second, microsecond=microsecond, + time_zone=time_zone, ), expected, ) + + +def test_should_raise_if_time_zone_is_invalid() -> None: + column = Column("a", [None]) + with pytest.raises(ValueError, match="Invalid time zone"): + column.transform(lambda _: Cell.datetime(1, 2, 3, time_zone="invalid")) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_equals.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_equals.py index 023cdf615..df3bb8f83 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_equals.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_equals.py @@ -34,31 +34,31 @@ _LazyCell(pl.col("a")), True, ), - # not equal (different constant value) + # not equal (different constant values) ( Cell.constant(1), Cell.constant(2), False, ), - # not equal (different constant type) + # not equal (different constant types) ( Cell.constant(1), Cell.constant("1"), False, ), - # not equal (different date, int) + # not equal (different dates, int) ( Cell.date(2025, 1, 15), Cell.date(2024, 1, 15), False, ), - # not equal (different date, column) + # not equal (different dates, column) ( Cell.date(_LazyCell(pl.col("a")), 1, 15), Cell.date(_LazyCell(pl.col("b")), 1, 15), False, ), - # not equal (different column) + # not equal (different columns) ( _LazyCell(pl.col("a")), _LazyCell(pl.col("b")), @@ -78,11 +78,11 @@ "equal (date, column)", "equal (column)", # Not equal - "not equal (different constant value)", - "not equal (different constant type)", - "not equal (different date, int)", - "not equal (different date, column)", - "not equal (different column)", + "not equal (different constant values)", + "not equal (different constant types)", + "not equal (different dates, int)", + "not equal (different dates, column)", + "not equal (different columns)", "not equal (different cell kinds)", ], ) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_floor.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_floor.py index 73ecf8e85..e8afaebdc 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_floor.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_floor.py @@ -27,9 +27,8 @@ "None", ], ) -class TestShouldReturnFloor: - def test_dunder_method(self, value: float | None, expected: float | None) -> None: - assert_cell_operation_works(value, lambda cell: math.floor(cell), expected, type_if_none=ColumnType.float64()) +def test_should_return_floor(value: float | None, expected: float | None) -> None: + assert_cell_operation_works(value, lambda cell: math.floor(cell), expected, type_if_none=ColumnType.float64()) - def test_named_method(self, value: float | None, expected: float | None) -> None: - assert_cell_operation_works(value, lambda cell: cell.floor(), expected, type_if_none=ColumnType.float64()) + +# The corresponding named method is inside the `math` namespace. diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_hash.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_hash.py index 7c5a6d312..55c00fa4f 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_hash.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_hash.py @@ -41,27 +41,27 @@ def test_should_return_same_hash_in_different_processes( @pytest.mark.parametrize( ("cell_1", "cell_2"), [ - # different constant value + # different constant values ( Cell.constant(1), Cell.constant(2), ), - # different constant type + # different constant types ( Cell.constant(1), Cell.constant("1"), ), - # different date, int + # different dates, int ( Cell.date(2025, 1, 15), Cell.date(2024, 1, 15), ), - # different date, column + # different dates, column ( Cell.date(_LazyCell(pl.col("a")), 1, 15), Cell.date(_LazyCell(pl.col("b")), 1, 15), ), - # different column + # different columns ( _LazyCell(pl.col("a")), _LazyCell(pl.col("b")), @@ -73,11 +73,11 @@ def test_should_return_same_hash_in_different_processes( ), ], ids=[ - "different constant value", - "different constant type", - "different date, int", - "different date, column", - "different column", + "different constant values", + "different constant types", + "different dates, int", + "different dates, column", + "different columns", "different cell kinds", ], ) diff --git a/tests/safeds/data/tabular/containers/_lazy_cell/test_repr.py b/tests/safeds/data/tabular/containers/_lazy_cell/test_repr.py index 91313f5ad..44aad275e 100644 --- a/tests/safeds/data/tabular/containers/_lazy_cell/test_repr.py +++ b/tests/safeds/data/tabular/containers/_lazy_cell/test_repr.py @@ -10,11 +10,11 @@ [ ( Cell.constant(1), - "dyn int: 1", + "_LazyCell(dyn int: 1)", ), ( _LazyCell(pl.col("a")), - 'col("a")', + '_LazyCell(col("a"))', ), ], ids=[ @@ -24,4 +24,4 @@ ) def test_should_return_a_string_representation(cell: Cell, expected: str) -> None: # We do not care about the exact string representation, this is only for debugging - assert expected in repr(cell) + assert repr(cell) == expected diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_equals.py b/tests/safeds/data/tabular/containers/_lazy_string_cell/test_equals.py deleted file mode 100644 index 11bff4b38..000000000 --- a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_equals.py +++ /dev/null @@ -1,42 +0,0 @@ -from typing import Any - -import polars as pl -import pytest - -from safeds.data.tabular.containers import StringCell, Table -from safeds.data.tabular.containers._lazy_string_cell import _LazyStringCell - - -@pytest.mark.parametrize( - ("cell1", "cell2", "expected"), - [ - (_LazyStringCell(pl.col("a")), _LazyStringCell(pl.col("a")), True), - (_LazyStringCell(pl.col("a")), _LazyStringCell(pl.col("b")), False), - ], - ids=[ - "equal", - "different", - ], -) -def test_should_return_whether_two_cells_are_equal(cell1: StringCell, cell2: StringCell, expected: bool) -> None: - assert (cell1._equals(cell2)) == expected - - -def test_should_return_true_if_objects_are_identical() -> None: - cell = _LazyStringCell(pl.col("a")) - assert (cell._equals(cell)) is True - - -@pytest.mark.parametrize( - ("cell", "other"), - [ - (_LazyStringCell(pl.col("a")), None), - (_LazyStringCell(pl.col("a")), Table({})), - ], - ids=[ - "Cell vs. None", - "Cell vs. Table", - ], -) -def test_should_return_not_implemented_if_other_is_not_cell(cell: StringCell, other: Any) -> None: - assert (cell._equals(other)) is NotImplemented diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_hash.py b/tests/safeds/data/tabular/containers/_lazy_string_cell/test_hash.py deleted file mode 100644 index f5962be36..000000000 --- a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_hash.py +++ /dev/null @@ -1,25 +0,0 @@ -import polars as pl -import pytest - -from safeds.data.tabular.containers import StringCell -from safeds.data.tabular.containers._lazy_string_cell import _LazyStringCell - - -def test_should_be_deterministic() -> None: - cell = _LazyStringCell(pl.col("a")) - assert hash(cell) == 8162512882156938440 - - -@pytest.mark.parametrize( - ("cell1", "cell2", "expected"), - [ - (_LazyStringCell(pl.col("a")), _LazyStringCell(pl.col("a")), True), - (_LazyStringCell(pl.col("a")), _LazyStringCell(pl.col("b")), False), - ], - ids=[ - "equal", - "different", - ], -) -def test_should_be_good_hash(cell1: StringCell, cell2: StringCell, expected: bool) -> None: - assert (hash(cell1) == hash(cell2)) == expected diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_sizeof.py b/tests/safeds/data/tabular/containers/_lazy_string_cell/test_sizeof.py deleted file mode 100644 index d01d9b0f7..000000000 --- a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_sizeof.py +++ /dev/null @@ -1,10 +0,0 @@ -import sys - -import polars as pl - -from safeds.data.tabular.containers._lazy_string_cell import _LazyStringCell - - -def test_should_return_size_greater_than_normal_object() -> None: - cell = _LazyStringCell(pl.col("a")) - assert sys.getsizeof(cell) > sys.getsizeof(object()) diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_to_float.py b/tests/safeds/data/tabular/containers/_lazy_string_cell/test_to_float.py deleted file mode 100644 index f9ea7ef9c..000000000 --- a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_to_float.py +++ /dev/null @@ -1,24 +0,0 @@ -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("string", "expected"), - [ - ("", None), - ("11", 11), - ("11.5", 11.5), - ("10e-1", 1.0), - ("abc", None), - ], - ids=[ - "empty", - "integer", - "float", - "scientific notation", - "invalid string", - ], -) -def test_should_parse_float(string: str, expected: bool) -> None: - assert_cell_operation_works(string, lambda cell: cell.str.to_float(), expected) diff --git a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_century.py b/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_century.py deleted file mode 100644 index 2d36808b6..000000000 --- a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_century.py +++ /dev/null @@ -1,20 +0,0 @@ -import datetime - -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("expected", "input_date"), - [ - (18, datetime.datetime(1800, 1, 9, 23, 29, 1, tzinfo=datetime.UTC)), - (21, datetime.date(2022, 1, 1)), - ], - ids=[ - "ISO datetime", - "ISO date", - ], -) -def test_get_day(input_date: datetime.date, expected: bool) -> None: - assert_cell_operation_works(input_date, lambda cell: cell.dt.century(), expected) diff --git a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_date_to_string.py b/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_date_to_string.py deleted file mode 100644 index 7418c167e..000000000 --- a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_date_to_string.py +++ /dev/null @@ -1,45 +0,0 @@ -import datetime - -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("expected", "input_date", "format_string"), - [ - ("2022-01-09", datetime.date(2022, 1, 9), "%F"), - ("2022/01/09", datetime.date(2022, 1, 9), "%Y/%m/%d"), - ], - ids=[ - "ISO date", - "ISO date format", - ], -) -def test_should_parse_date_to_string(input_date: datetime.date, expected: bool, format_string: str) -> None: - assert_cell_operation_works(input_date, lambda cell: cell.dt.date_to_string(format_string), expected) - - -@pytest.mark.parametrize( - ("expected", "input_date", "format_string"), - [ - ( - "Invalid format string", - datetime.date(2022, 1, 9), - "%9", - ), - ( - "Invalid format string", - datetime.date(2022, 1, 9), - "%Y%", - ), - ], - ids=["ISO datetime false", "ISO datetime false % at end"], -) -def test_should_raise_value_error_when_input_date_is_invalid( - input_date: datetime.date, - expected: str, - format_string: str, -) -> None: - with pytest.raises(ValueError, match=expected): - assert_cell_operation_works(input_date, lambda cell: cell.dt.date_to_string(format_string), expected) diff --git a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_datetime_to_string.py b/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_datetime_to_string.py deleted file mode 100644 index 942fef5bc..000000000 --- a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_datetime_to_string.py +++ /dev/null @@ -1,34 +0,0 @@ -import datetime - -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("expected", "input_date", "format_string"), - [ - ("2022/01/09 23:29:01", datetime.datetime(2022, 1, 9, 23, 29, 1, tzinfo=datetime.UTC), "%Y/%m/%d %H:%M:%S"), - ("2022:01:09 23/29/01", datetime.datetime(2022, 1, 9, 23, 29, 1, tzinfo=datetime.UTC), "%Y:%m:%d %H/%M/%S"), - ], - ids=[ - "ISO datetime", - "ISO datetime format", - ], -) -def test_should_parse_date_to_string(input_date: datetime.date, expected: bool, format_string: str) -> None: - assert_cell_operation_works(input_date, lambda cell: cell.dt.datetime_to_string(format_string), expected) - - -@pytest.mark.parametrize( - ("expected", "input_date"), - [ - ("Invalid format string", datetime.datetime(2022, 1, 9, 23, 29, 1, tzinfo=datetime.UTC)), - ], - ids=[ - "ISO datetime false", - ], -) -def test_should_raise_value_error_when_input_date_is_invalid(input_date: datetime.date, expected: str) -> None: - with pytest.raises(ValueError, match=expected): - assert_cell_operation_works(input_date, lambda cell: cell.dt.datetime_to_string("%9"), expected) diff --git a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_day.py b/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_day.py deleted file mode 100644 index afa9c588b..000000000 --- a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_day.py +++ /dev/null @@ -1,20 +0,0 @@ -import datetime - -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("expected", "input_date"), - [ - (9, datetime.datetime(2022, 1, 9, 23, 29, 1, tzinfo=datetime.UTC)), - (1, datetime.date(2022, 1, 1)), - ], - ids=[ - "ISO datetime", - "ISO date", - ], -) -def test_get_day(input_date: datetime.date, expected: bool) -> None: - assert_cell_operation_works(input_date, lambda cell: cell.dt.day(), expected) diff --git a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_equals.py b/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_equals.py deleted file mode 100644 index 1dfc9e181..000000000 --- a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_equals.py +++ /dev/null @@ -1,42 +0,0 @@ -from typing import Any - -import polars as pl -import pytest - -from safeds.data.tabular.containers import Table, TemporalCell -from safeds.data.tabular.containers._lazy_temporal_cell import _LazyTemporalCell - - -@pytest.mark.parametrize( - ("cell1", "cell2", "expected"), - [ - (_LazyTemporalCell(pl.col("a")), _LazyTemporalCell(pl.col("a")), True), - (_LazyTemporalCell(pl.col("a")), _LazyTemporalCell(pl.col("b")), False), - ], - ids=[ - "equal", - "different", - ], -) -def test_should_return_whether_two_cells_are_equal(cell1: TemporalCell, cell2: TemporalCell, expected: bool) -> None: - assert (cell1._equals(cell2)) == expected - - -def test_should_return_true_if_objects_are_identical() -> None: - cell = _LazyTemporalCell(pl.col("a")) - assert (cell._equals(cell)) is True - - -@pytest.mark.parametrize( - ("cell", "other"), - [ - (_LazyTemporalCell(pl.col("a")), None), - (_LazyTemporalCell(pl.col("a")), Table({})), - ], - ids=[ - "Cell vs. None", - "Cell vs. Table", - ], -) -def test_should_return_not_implemented_if_other_is_not_cell(cell: TemporalCell, other: Any) -> None: - assert (cell._equals(other)) is NotImplemented diff --git a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_hash.py b/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_hash.py deleted file mode 100644 index c040d04b6..000000000 --- a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_hash.py +++ /dev/null @@ -1,25 +0,0 @@ -import polars as pl -import pytest - -from safeds.data.tabular.containers import TemporalCell -from safeds.data.tabular.containers._lazy_temporal_cell import _LazyTemporalCell - - -def test_should_be_deterministic() -> None: - cell = _LazyTemporalCell(pl.col("a")) - assert hash(cell) == 8162512882156938440 - - -@pytest.mark.parametrize( - ("cell1", "cell2", "expected"), - [ - (_LazyTemporalCell(pl.col("a")), _LazyTemporalCell(pl.col("a")), True), - (_LazyTemporalCell(pl.col("a")), _LazyTemporalCell(pl.col("b")), False), - ], - ids=[ - "equal", - "different", - ], -) -def test_should_be_good_hash(cell1: TemporalCell, cell2: TemporalCell, expected: bool) -> None: - assert (hash(cell1) == hash(cell2)) == expected diff --git a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_month.py b/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_month.py deleted file mode 100644 index 626dff546..000000000 --- a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_month.py +++ /dev/null @@ -1,20 +0,0 @@ -import datetime - -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("expected", "input_date"), - [ - (3, datetime.datetime(2022, 3, 9, 23, 29, 1, tzinfo=datetime.UTC)), - (1, datetime.date(2022, 1, 1)), - ], - ids=[ - "ISO datetime", - "ISO date", - ], -) -def test_get_month(input_date: datetime.date, expected: bool) -> None: - assert_cell_operation_works(input_date, lambda cell: cell.dt.month(), expected) diff --git a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_sizeof.py b/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_sizeof.py deleted file mode 100644 index 89ff99c06..000000000 --- a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_sizeof.py +++ /dev/null @@ -1,10 +0,0 @@ -import sys - -import polars as pl - -from safeds.data.tabular.containers._lazy_temporal_cell import _LazyTemporalCell - - -def test_should_return_size_greater_than_normal_object() -> None: - cell = _LazyTemporalCell(pl.col("a")) - assert sys.getsizeof(cell) > sys.getsizeof(object()) diff --git a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_week.py b/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_week.py deleted file mode 100644 index 3a6c7fd60..000000000 --- a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_week.py +++ /dev/null @@ -1,20 +0,0 @@ -import datetime - -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("expected", "input_date"), - [ - (10, datetime.datetime(2023, 3, 9, 23, 29, 1, tzinfo=datetime.UTC)), - (52, datetime.date(2022, 1, 1)), - ], - ids=[ - "ISO datetime", - "ISO date", - ], -) -def test_get_week(input_date: datetime.date, expected: bool) -> None: - assert_cell_operation_works(input_date, lambda cell: cell.dt.week(), expected) diff --git a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_weekday.py b/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_weekday.py deleted file mode 100644 index 9db08b4fc..000000000 --- a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_weekday.py +++ /dev/null @@ -1,20 +0,0 @@ -import datetime - -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("expected", "input_date"), - [ - (4, datetime.datetime(2023, 3, 9, 23, 29, 1, tzinfo=datetime.UTC)), - (6, datetime.date(2022, 1, 1)), - ], - ids=[ - "ISO datetime", - "ISO date", - ], -) -def test_get_weekday(input_date: datetime.date, expected: bool) -> None: - assert_cell_operation_works(input_date, lambda cell: cell.dt.weekday(), expected) diff --git a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_year.py b/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_year.py deleted file mode 100644 index e35810e52..000000000 --- a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/test_year.py +++ /dev/null @@ -1,20 +0,0 @@ -import datetime - -import pytest - -from tests.helpers import assert_cell_operation_works - - -@pytest.mark.parametrize( - ("expected", "input_date"), - [ - (2023, datetime.datetime(2023, 3, 9, 23, 29, 1, tzinfo=datetime.UTC)), - (2022, datetime.date(2022, 1, 1)), - ], - ids=[ - "ISO datetime", - "ISO date", - ], -) -def test_get_year(input_date: datetime.date, expected: bool) -> None: - assert_cell_operation_works(input_date, lambda cell: cell.dt.year(), expected) diff --git a/tests/safeds/_validation/__init__.py b/tests/safeds/data/tabular/query/__init__.py similarity index 100% rename from tests/safeds/_validation/__init__.py rename to tests/safeds/data/tabular/query/__init__.py diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/__init__.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/__init__.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/__init__.py rename to tests/safeds/data/tabular/query/_lazy_datetime_operations/__init__.py diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/__snapshots__/test_hash.ambr b/tests/safeds/data/tabular/query/_lazy_datetime_operations/__snapshots__/test_hash.ambr new file mode 100644 index 000000000..4ed1678cb --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/__snapshots__/test_hash.ambr @@ -0,0 +1,7 @@ +# serializer version: 1 +# name: TestContract.test_should_return_same_hash_in_different_processes[column] + 8162512882156938440 +# --- +# name: TestContract.test_should_return_same_hash_in_different_processes[time] + 1565184979992361175 +# --- diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_century.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_century.py new file mode 100644 index 000000000..3a32f1f6e --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_century.py @@ -0,0 +1,39 @@ +from datetime import UTC, date, datetime + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (datetime(1, 1, 1, tzinfo=UTC), 1), + (datetime(100, 12, 31, tzinfo=UTC), 1), + (datetime(101, 1, 1, tzinfo=UTC), 2), + (date(1, 1, 1), 1), + (date(100, 12, 31), 1), + (date(101, 1, 1), 2), + (None, None), + ], + ids=[ + "datetime - first day of first century", + "datetime - last day of first century", + "datetime - first day of second century", + "date - first day of first century", + "date - last day of first century", + "date - first day of second century", + "None", + ], +) +def test_should_extract_century( + value: datetime | date | None, + expected: int | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.century(), + expected, + type_if_none=ColumnType.datetime(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_date.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_date.py new file mode 100644 index 000000000..0439b61a7 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_date.py @@ -0,0 +1,29 @@ +from datetime import UTC, date, datetime + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (datetime(1, 2, 3, tzinfo=UTC), date(1, 2, 3)), + (None, None), + ], + ids=[ + "datetime", + "None", + ], +) +def test_should_extract_date( + value: datetime | None, + expected: date | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.date(), + expected, + type_if_none=ColumnType.datetime(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_day.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_day.py new file mode 100644 index 000000000..4c53d8e2d --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_day.py @@ -0,0 +1,31 @@ +from datetime import UTC, date, datetime + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (datetime(1, 2, 3, tzinfo=UTC), 3), + (date(1, 2, 3), 3), + (None, None), + ], + ids=[ + "datetime", + "date", + "None", + ], +) +def test_should_extract_day( + value: datetime | date | None, + expected: int | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.day(), + expected, + type_if_none=ColumnType.datetime(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_day_of_week.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_day_of_week.py new file mode 100644 index 000000000..cbd6470f5 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_day_of_week.py @@ -0,0 +1,55 @@ +from datetime import UTC, date, datetime + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (datetime(1, 1, 1, tzinfo=UTC), 1), + (datetime(1, 1, 2, tzinfo=UTC), 2), + (datetime(1, 1, 3, tzinfo=UTC), 3), + (datetime(1, 1, 4, tzinfo=UTC), 4), + (datetime(1, 1, 5, tzinfo=UTC), 5), + (datetime(1, 1, 6, tzinfo=UTC), 6), + (datetime(1, 1, 7, tzinfo=UTC), 7), + (date(1, 1, 1), 1), + (date(1, 1, 2), 2), + (date(1, 1, 3), 3), + (date(1, 1, 4), 4), + (date(1, 1, 5), 5), + (date(1, 1, 6), 6), + (date(1, 1, 7), 7), + (None, None), + ], + ids=[ + "datetime - Monday", + "datetime - Tuesday", + "datetime - Wednesday", + "datetime - Thursday", + "datetime - Friday", + "datetime - Saturday", + "datetime - Sunday", + "date - Monday", + "date - Tuesday", + "date - Wednesday", + "date - Thursday", + "date - Friday", + "date - Saturday", + "date - Sunday", + "None", + ], +) +def test_should_extract_day_of_week( + value: datetime | date | None, + expected: int | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.day_of_week(), + expected, + type_if_none=ColumnType.datetime(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_day_of_year.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_day_of_year.py new file mode 100644 index 000000000..eac185e1b --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_day_of_year.py @@ -0,0 +1,39 @@ +from datetime import UTC, date, datetime + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (datetime(1, 1, 1, tzinfo=UTC), 1), + (datetime(1, 12, 31, tzinfo=UTC), 365), + (datetime(4, 12, 31, tzinfo=UTC), 366), + (date(1, 1, 1), 1), + (date(1, 12, 31), 365), + (date(4, 12, 31), 366), + (None, None), + ], + ids=[ + "datetime - first", + "datetime - last in non-leap year", + "datetime - last in leap year", + "date - first", + "date - last in non-leap year", + "date - last in leap year", + "None", + ], +) +def test_should_extract_day_of_year( + value: datetime | date | None, + expected: int | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.day_of_year(), + expected, + type_if_none=ColumnType.datetime(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_eq.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_eq.py new file mode 100644 index 000000000..f21dbb7a6 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_eq.py @@ -0,0 +1,90 @@ +from typing import Any + +import polars as pl +import pytest + +from safeds.data.tabular.containers import Cell, Column +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import DatetimeOperations + + +@pytest.mark.parametrize( + ("ops_1", "ops_2", "expected"), + [ + # equal (time) + ( + Cell.time(1, 0, 0).dt, + Cell.time(1, 0, 0).dt, + True, + ), + # equal (column) + ( + _LazyCell(pl.col("a")).dt, + _LazyCell(pl.col("a")).dt, + True, + ), + # not equal (different times) + ( + Cell.time(1, 0, 0).dt, + Cell.time(2, 0, 0).dt, + False, + ), + # not equal (different columns) + ( + _LazyCell(pl.col("a")).dt, + _LazyCell(pl.col("b")).dt, + False, + ), + # not equal (different cell kinds) + ( + Cell.time(1, 0, 0).dt, + _LazyCell(pl.col("a")).dt, + False, + ), + ], + ids=[ + # Equal + "equal (time)", + "equal (column)", + # Not equal + "not equal (different times)", + "not equal (different columns)", + "not equal (different cell kinds)", + ], +) +def test_should_return_whether_objects_are_equal( + ops_1: DatetimeOperations, + ops_2: DatetimeOperations, + expected: bool, +) -> None: + assert (ops_1.__eq__(ops_2)) == expected + + +@pytest.mark.parametrize( + "ops", + [ + Cell.time(1, 0, 0).dt, + _LazyCell(pl.col("a")).dt, + ], + ids=[ + "time", + "column", + ], +) +def test_should_return_true_if_objects_are_identical(ops: DatetimeOperations) -> None: + assert (ops.__eq__(ops)) is True + + +@pytest.mark.parametrize( + ("ops", "other"), + [ + (Cell.time(1, 0, 0).dt, None), + (Cell.time(1, 0, 0).dt, Column("col1", [1])), + ], + ids=[ + "DatetimeOperations vs. None", + "DatetimeOperations vs. Column", + ], +) +def test_should_return_not_implemented_if_other_has_different_type(ops: DatetimeOperations, other: Any) -> None: + assert (ops.__eq__(other)) is NotImplemented diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_hash.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_hash.py new file mode 100644 index 000000000..7af6e3776 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_hash.py @@ -0,0 +1,64 @@ +from collections.abc import Callable + +import polars as pl +import pytest +from syrupy import SnapshotAssertion + +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import DatetimeOperations + + +@pytest.mark.parametrize( + "ops_factory", + [ + lambda: Cell.time(1, 0, 0).dt, + lambda: _LazyCell(pl.col("a")).dt, + ], + ids=[ + "time", + "column", + ], +) +class TestContract: + def test_should_return_same_hash_for_equal_objects(self, ops_factory: Callable[[], DatetimeOperations]) -> None: + ops_1 = ops_factory() + ops_2 = ops_factory() + assert hash(ops_1) == hash(ops_2) + + def test_should_return_same_hash_in_different_processes( + self, + ops_factory: Callable[[], DatetimeOperations], + snapshot: SnapshotAssertion, + ) -> None: + ops = ops_factory() + assert hash(ops) == snapshot + + +@pytest.mark.parametrize( + ("ops_1", "ops_2"), + [ + # different times + ( + Cell.time(1, 0, 0).dt, + Cell.time(2, 0, 0).dt, + ), + # different columns + ( + _LazyCell(pl.col("a")).dt, + _LazyCell(pl.col("b")).dt, + ), + # different cell kinds + ( + Cell.time(1, 0, 0).dt, + _LazyCell(pl.col("a")).dt, + ), + ], + ids=[ + "different times", + "different columns", + "different cell kinds", + ], +) +def test_should_be_good_hash(ops_1: DatetimeOperations, ops_2: DatetimeOperations) -> None: + assert hash(ops_1) != hash(ops_2) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_hour.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_hour.py new file mode 100644 index 000000000..089c8ddc1 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_hour.py @@ -0,0 +1,31 @@ +from datetime import UTC, datetime, time + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (datetime(1, 2, 3, 4, 5, 6, 7, tzinfo=UTC), 4), + (time(4, 5, 6, 7), 4), + (None, None), + ], + ids=[ + "datetime", + "time", + "None", + ], +) +def test_should_extract_hour( + value: datetime | time | None, + expected: int | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.hour(), + expected, + type_if_none=ColumnType.datetime(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_is_in_leap_year.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_is_in_leap_year.py new file mode 100644 index 000000000..2401c8174 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_is_in_leap_year.py @@ -0,0 +1,43 @@ +from datetime import UTC, date, datetime + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (datetime(1999, 1, 1, tzinfo=UTC), False), + (datetime(1996, 1, 1, tzinfo=UTC), True), + (datetime(1900, 1, 1, tzinfo=UTC), False), + (datetime(2000, 1, 1, tzinfo=UTC), True), + (date(1999, 1, 1), False), + (date(1996, 1, 1), True), + (date(1900, 1, 1), False), + (date(2000, 1, 1), True), + (None, None), + ], + ids=[ + "datetime - not divisible by 4", + "datetime - divisible by 4", + "datetime - divisible by 100", + "datetime - divisible by 400", + "date - not divisible by 4", + "date - divisible by 4", + "date - divisible by 100", + "date - divisible by 400", + "None", + ], +) +def test_should_check_if_is_in_leap_date( + value: datetime | date | None, + expected: int | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.is_in_leap_year(), + expected, + type_if_none=ColumnType.datetime(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_microsecond.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_microsecond.py new file mode 100644 index 000000000..3cb03fcac --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_microsecond.py @@ -0,0 +1,31 @@ +from datetime import UTC, datetime, time + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (datetime(1, 2, 3, 4, 5, 6, 7, tzinfo=UTC), 7), + (time(4, 5, 6, 7), 7), + (None, None), + ], + ids=[ + "datetime", + "time", + "None", + ], +) +def test_should_extract_microsecond( + value: datetime | time | None, + expected: int | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.microsecond(), + expected, + type_if_none=ColumnType.datetime(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_millennium.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_millennium.py new file mode 100644 index 000000000..c7f790f10 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_millennium.py @@ -0,0 +1,39 @@ +from datetime import UTC, date, datetime + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (datetime(1, 1, 1, tzinfo=UTC), 1), + (datetime(1000, 12, 31, tzinfo=UTC), 1), + (datetime(1001, 1, 1, tzinfo=UTC), 2), + (date(1, 1, 1), 1), + (date(1000, 12, 31), 1), + (date(1001, 1, 1), 2), + (None, None), + ], + ids=[ + "datetime - first day of first millennium", + "datetime - last day of first millennium", + "datetime - first day of second millennium", + "date - first day of first millennium", + "date - last day of first millennium", + "date - first day of second millennium", + "None", + ], +) +def test_should_extract_millennium( + value: datetime | date | None, + expected: int | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.millennium(), + expected, + type_if_none=ColumnType.datetime(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_millisecond.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_millisecond.py new file mode 100644 index 000000000..134211ee4 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_millisecond.py @@ -0,0 +1,35 @@ +from datetime import UTC, datetime, time + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (datetime(1, 2, 3, 4, 5, 6, 7000, tzinfo=UTC), 7), + (datetime(1, 2, 3, 4, 5, 6, 999, tzinfo=UTC), 0), + (time(4, 5, 6, 7000), 7), + (time(4, 5, 6, 999), 0), + (None, None), + ], + ids=[ + "datetime - with milliseconds", + "datetime - without full milliseconds", + "time - with milliseconds", + "time - without full milliseconds", + "None", + ], +) +def test_should_extract_millisecond( + value: datetime | time | None, + expected: int | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.millisecond(), + expected, + type_if_none=ColumnType.datetime(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_minute.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_minute.py new file mode 100644 index 000000000..b3bf1de5f --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_minute.py @@ -0,0 +1,31 @@ +from datetime import UTC, datetime, time + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (datetime(1, 2, 3, 4, 5, 6, 7, tzinfo=UTC), 5), + (time(4, 5, 6, 7), 5), + (None, None), + ], + ids=[ + "datetime", + "time", + "None", + ], +) +def test_should_extract_minute( + value: datetime | time | None, + expected: int | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.minute(), + expected, + type_if_none=ColumnType.datetime(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_month.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_month.py new file mode 100644 index 000000000..6236f4a63 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_month.py @@ -0,0 +1,31 @@ +from datetime import UTC, date, datetime + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (datetime(1, 2, 3, tzinfo=UTC), 2), + (date(1, 2, 3), 2), + (None, None), + ], + ids=[ + "datetime", + "date", + "None", + ], +) +def test_should_extract_month( + value: datetime | date | None, + expected: int | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.month(), + expected, + type_if_none=ColumnType.datetime(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_quarter.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_quarter.py new file mode 100644 index 000000000..af5dce5ac --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_quarter.py @@ -0,0 +1,59 @@ +from datetime import UTC, date, datetime + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (datetime(1, 1, 1, tzinfo=UTC), 1), + (datetime(1, 3, 31, tzinfo=UTC), 1), + (datetime(1, 4, 1, tzinfo=UTC), 2), + (datetime(1, 6, 30, tzinfo=UTC), 2), + (datetime(1, 7, 1, tzinfo=UTC), 3), + (datetime(1, 9, 30, tzinfo=UTC), 3), + (datetime(1, 10, 1, tzinfo=UTC), 4), + (datetime(1, 12, 31, tzinfo=UTC), 4), + (date(1, 1, 1), 1), + (date(1, 3, 31), 1), + (date(1, 4, 1), 2), + (date(1, 6, 30), 2), + (date(1, 7, 1), 3), + (date(1, 9, 30), 3), + (date(1, 10, 1), 4), + (date(1, 12, 31), 4), + (None, None), + ], + ids=[ + "datetime - first day of first quarter", + "datetime - last day of first quarter", + "datetime - first day of second quarter", + "datetime - last day of second quarter", + "datetime - first day of third quarter", + "datetime - last day of third quarter", + "datetime - first day of fourth quarter", + "datetime - last day of fourth quarter", + "date - first day of first quarter", + "date - last day of first quarter", + "date - first day of second quarter", + "date - last day of second quarter", + "date - first day of third quarter", + "date - last day of third quarter", + "date - first day of fourth quarter", + "date - last day of fourth quarter", + "None", + ], +) +def test_should_extract_quarter( + value: datetime | date | None, + expected: int | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.quarter(), + expected, + type_if_none=ColumnType.datetime(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_replace.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_replace.py new file mode 100644 index 000000000..a201398e0 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_replace.py @@ -0,0 +1,127 @@ +from datetime import UTC, date, datetime + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + +DATETIME = datetime(1, 2, 3, 4, 5, 6, 7, tzinfo=UTC) +DATE = date(1, 2, 3) + + +@pytest.mark.parametrize( + ("value", "year", "month", "day", "hour", "minute", "second", "microsecond", "expected"), + [ + # datetime - change year + (DATETIME, 10, None, None, None, None, None, None, datetime(10, 2, 3, 4, 5, 6, 7, tzinfo=UTC)), + # datetime - change month (valid) + (DATETIME, None, 10, None, None, None, None, None, datetime(1, 10, 3, 4, 5, 6, 7, tzinfo=UTC)), + # datetime - change month (invalid) + (DATETIME, None, 13, None, None, None, None, None, None), + # datetime - change day (valid) + (DATETIME, None, None, 10, None, None, None, None, datetime(1, 2, 10, 4, 5, 6, 7, tzinfo=UTC)), + # datetime - change day (invalid) + (DATETIME, None, None, 32, None, None, None, None, None), + # datetime - change hour (valid) + (DATETIME, None, None, None, 10, None, None, None, datetime(1, 2, 3, 10, 5, 6, 7, tzinfo=UTC)), + # datetime - change hour (invalid) + (DATETIME, None, None, None, 24, None, None, None, None), + # datetime - change minute (valid) + (DATETIME, None, None, None, None, 10, None, None, datetime(1, 2, 3, 4, 10, 6, 7, tzinfo=UTC)), + # datetime - change minute (invalid) + (DATETIME, None, None, None, None, 60, None, None, None), + # datetime - change second (valid) + (DATETIME, None, None, None, None, None, 10, None, datetime(1, 2, 3, 4, 5, 10, 7, tzinfo=UTC)), + # datetime - change second (invalid) + (DATETIME, None, None, None, None, None, 60, None, None), + # datetime - change microsecond (valid) + (DATETIME, None, None, None, None, None, None, 10, datetime(1, 2, 3, 4, 5, 6, 10, tzinfo=UTC)), + # datetime - change microsecond (invalid) + (DATETIME, None, None, None, None, None, None, 1000000, None), + # date - change year + (DATE, 10, None, None, None, None, None, None, date(10, 2, 3)), + # date - change month (valid) + (DATE, None, 10, None, None, None, None, None, date(1, 10, 3)), + # date - change month (invalid) + (DATE, None, 13, None, None, None, None, None, None), + # date - change day (valid) + (DATE, None, None, 10, None, None, None, None, date(1, 2, 10)), + # date - change day (invalid) + (DATE, None, None, 32, None, None, None, None, None), + # date - change hour (valid) + (DATE, None, None, None, 10, None, None, None, DATE), + # date - change hour (invalid) + (DATE, None, None, None, 24, None, None, None, DATE), + # date - change minute (valid) + (DATE, None, None, None, None, 10, None, None, DATE), + # date - change minute (invalid) + (DATE, None, None, None, None, 60, None, None, DATE), + # date - change second (valid) + (DATE, None, None, None, None, None, 10, None, DATE), + # date - change second (invalid) + (DATE, None, None, None, None, None, 60, None, DATE), + # date - change microsecond (valid) + (DATE, None, None, None, None, None, None, 10, DATE), + # date - change microsecond (invalid) + (DATE, None, None, None, None, None, None, 1000000, DATE), + # None + (None, None, None, None, None, None, None, None, None), + ], + ids=[ + # datetime + "datetime - change year", + "datetime - change month (valid)", + "datetime - change month (invalid)", + "datetime - change day (valid)", + "datetime - change day (invalid)", + "datetime - change hour (valid)", + "datetime - change hour (invalid)", + "datetime - change minute (valid)", + "datetime - change minute (invalid)", + "datetime - change second (valid)", + "datetime - change second (invalid)", + "datetime - change microsecond (valid)", + "datetime - change microsecond (invalid)", + # date + "date - change year", + "date - change month (valid)", + "date - change month (invalid)", + "date - change day (valid)", + "date - change day (invalid)", + "date - change hour (valid)", + "date - change hour (invalid)", + "date - change minute (valid)", + "date - change minute (invalid)", + "date - change second (valid)", + "date - change second (invalid)", + "date - change microsecond (valid)", + "date - change microsecond (invalid)", + # None + "None", + ], +) +def test_should_replace_components( + value: datetime | date | None, + year: int | None, + month: int | None, + day: int | None, + hour: int | None, + minute: int | None, + second: int | None, + microsecond: int | None, + expected: int | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.replace( + year=year, + month=month, + day=day, + hour=hour, + minute=minute, + second=second, + microsecond=microsecond, + ), + expected, + type_if_none=ColumnType.datetime(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_repr.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_repr.py new file mode 100644 index 000000000..238eaa189 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_repr.py @@ -0,0 +1,22 @@ +import polars as pl +import pytest + +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import DatetimeOperations + + +@pytest.mark.parametrize( + ("ops", "expected"), + [ + ( + _LazyCell(pl.col("a")).dt, + '_LazyDatetimeOperations(col("a"))', + ), + ], + ids=[ + "column", + ], +) +def test_should_return_a_string_representation(ops: DatetimeOperations, expected: str) -> None: + # We do not care about the exact string representation, this is only for debugging + assert repr(ops) == expected diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_second.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_second.py new file mode 100644 index 000000000..d0ee3c0c3 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_second.py @@ -0,0 +1,31 @@ +from datetime import UTC, datetime, time + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (datetime(1, 2, 3, 4, 5, 6, 7, tzinfo=UTC), 6), + (time(4, 5, 6, 7), 6), + (None, None), + ], + ids=[ + "datetime", + "time", + "None", + ], +) +def test_should_extract_second( + value: datetime | time | None, + expected: int | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.second(), + expected, + type_if_none=ColumnType.datetime(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_sizeof.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_sizeof.py new file mode 100644 index 000000000..d16bc1cf2 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_sizeof.py @@ -0,0 +1,23 @@ +import sys + +import polars as pl +import pytest + +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import DatetimeOperations + + +@pytest.mark.parametrize( + "ops", + [ + Cell.time(1, 0, 0).dt, + _LazyCell(pl.col("a")).dt, + ], + ids=[ + "time", + "column", + ], +) +def test_should_be_larger_than_normal_object(ops: DatetimeOperations) -> None: + assert sys.getsizeof(ops) > sys.getsizeof(object()) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_str.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_str.py new file mode 100644 index 000000000..8cc48623f --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_str.py @@ -0,0 +1,22 @@ +import polars as pl +import pytest + +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import DatetimeOperations + + +@pytest.mark.parametrize( + ("ops", "expected"), + [ + ( + _LazyCell(pl.col("a")).dt, + '(col("a")).dt', + ), + ], + ids=[ + "column", + ], +) +def test_should_return_a_string_representation(ops: DatetimeOperations, expected: str) -> None: + # We do not care about the exact string representation, this is only for debugging + assert str(ops) == expected diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_time.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_time.py new file mode 100644 index 000000000..c58d5321d --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_time.py @@ -0,0 +1,29 @@ +from datetime import UTC, datetime, time + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (datetime(1, 2, 3, 4, 5, 6, 7, tzinfo=UTC), time(4, 5, 6, 7)), + (None, None), + ], + ids=[ + "datetime", + "None", + ], +) +def test_should_extract_time( + value: datetime | None, + expected: time | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.time(), + expected, + type_if_none=ColumnType.datetime(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_to_string.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_to_string.py new file mode 100644 index 000000000..a5041f376 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_to_string.py @@ -0,0 +1,268 @@ +from datetime import UTC, date, datetime, time + +import pytest + +from safeds.data.tabular.containers import Column +from safeds.data.tabular.typing import ColumnType +from safeds.exceptions import LazyComputationError +from tests.helpers import assert_cell_operation_works + +DATETIME = datetime(1, 2, 3, 4, 5, 6, 7, tzinfo=UTC) +DATE = date(1, 2, 3) +TIME = time(4, 5, 6, 7) + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (datetime(1, 2, 3, 4, 5, 6, 7), "0001-02-03T04:05:06.000007"), # noqa: DTZ001 + (DATETIME, "0001-02-03T04:05:06.000007+00:00"), + (DATE, "0001-02-03"), + (TIME, "04:05:06.000007"), + (None, None), + ], + ids=[ + "datetime without time zone", + "datetime with time zone", + "date", + "time", + "None", + ], +) +def test_should_handle_iso_8601(value: datetime | date | time | None, expected: str | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.to_string(format="iso"), + expected, + type_if_none=ColumnType.datetime(), + ) + + +@pytest.mark.parametrize( + "value", + [ + DATETIME, + DATE, + ], + ids=[ + "datetime", + "date", + ], +) +class TestDateSpecifiers: + @pytest.mark.parametrize( + ("format_", "expected"), + [ + ("{Y}", "0001"), + ("{_Y}", " 1"), + ("{^Y}", "1"), + ("{Y99}", "01"), + ("{_Y99}", " 1"), + ("{^Y99}", "1"), + ("{M}", "02"), + ("{_M}", " 2"), + ("{^M}", "2"), + ("{M-full}", "February"), + ("{M-short}", "Feb"), + ("{W}", "05"), + ("{_W}", " 5"), + ("{^W}", "5"), + ("{D}", "03"), + ("{_D}", " 3"), + ("{^D}", "3"), + ("{DOW}", "6"), + ("{DOW-full}", "Saturday"), + ("{DOW-short}", "Sat"), + ("{DOY}", "034"), + ("{_DOY}", " 34"), + ("{^DOY}", "34"), + ], + ids=[ + "{Y}", + "{_Y}", + "{^Y}", + "{Y99}", + "{_Y99}", + "{^Y99}", + "{M}", + "{_M}", + "{^M}", + "{M-full}", + "{M-short}", + "{W}", + "{_W}", + "{^W}", + "{D}", + "{_D}", + "{^D}", + "{DOW}", + "{DOW-full}", + "{DOW-short}", + "{DOY}", + "{_DOY}", + "{^DOY}", + ], + ) + def test_should_be_replaced_with_correct_string(self, value: datetime | date, format_: str, expected: str) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.to_string(format=format_), + expected, + ) + + +@pytest.mark.parametrize( + "value", + [ + DATETIME, + TIME, + ], + ids=[ + "datetime", + "time", + ], +) +class TestTimeSpecifiers: + @pytest.mark.parametrize( + ("format_", "expected"), + [ + ("{h}", "04"), + ("{_h}", " 4"), + ("{^h}", "4"), + ("{h12}", "04"), + ("{_h12}", " 4"), + ("{^h12}", "4"), + ("{m}", "05"), + ("{_m}", " 5"), + ("{^m}", "5"), + ("{s}", "06"), + ("{_s}", " 6"), + ("{^s}", "6"), + ("{.f}", ".000007"), + ("{ms}", "000"), + ("{us}", "000007"), + ("{ns}", "000007000"), + ("{AM/PM}", "AM"), + ("{am/pm}", "am"), + ], + ids=[ + "{h}", + "{_h}", + "{^h}", + "{h12}", + "{_h12}", + "{^h12}", + "{m}", + "{_m}", + "{^m}", + "{s}", + "{_s}", + "{^s}", + "{.f}", + "{ms}", + "{us}", + "{ns}", + "{AM/PM}", + "{am/pm}", + ], + ) + def test_should_be_replaced_with_correct_string(self, value: datetime | time, format_: str, expected: str) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.to_string(format=format_), + expected, + ) + + +@pytest.mark.parametrize( + "value", + [ + DATETIME, + ], + ids=[ + "datetime", + ], +) +class TestDateTimeSpecifiers: + @pytest.mark.parametrize( + ("format_", "expected"), + [ + ("{z}", "+0000"), + ("{:z}", "+00:00"), + ("{u}", "-62132730894"), + ], + ids=[ + "{z}", + "{:z}", + "{u}", + ], + ) + def test_should_be_replaced_with_correct_string(self, value: datetime, format_: str, expected: str) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.to_string(format=format_), + expected, + ) + + +@pytest.mark.parametrize( + ("format_", "expected"), + [ + ("\\", "\\"), + ("\\\\", "\\"), + ("\\{", "{"), + ("%", "%"), + ("\n", "\n"), + ("\t", "\t"), + ], + ids=[ + "backslash at end", + "escaped backslash", + "escaped open curly brace", + "percent", + "newline", + "tab", + ], +) +def test_should_handle_escape_sequences(format_: str, expected: date | time | None) -> None: + assert_cell_operation_works( + DATETIME, + lambda cell: cell.dt.to_string(format=format_), + expected, + ) + + +def test_should_raise_for_unclosed_specifier() -> None: + column = Column("a", [DATETIME]) + with pytest.raises(ValueError, match="Unclosed specifier"): + column.transform(lambda cell: cell.dt.to_string(format="{Y")) + + +def test_should_raise_for_globally_invalid_specifier() -> None: + column = Column("a", [DATETIME]) + with pytest.raises(ValueError, match="Invalid specifier"): + column.transform(lambda cell: cell.dt.to_string(format="{invalid}")) + + +@pytest.mark.parametrize( + ("value", "format_"), + [ + (DATE, "{h}"), + pytest.param( + TIME, + "{Y}", + marks=pytest.mark.skip("polars panics in this case (https://github.com/pola-rs/polars/issues/19853)."), + ), + ], + ids=[ + "invalid for date", + "invalid for time", + ], +) +def test_should_raise_for_specifier_that_is_invalid_for_type(value: date | time | None, format_: str) -> None: + # TODO: This is not the ideal behavior. Once https://github.com/Safe-DS/Library/issues/860 is resolved, we should + # do our own validation to raise an error that knows our own specifiers. + column = Column("a", [value]) + lazy_result = column.transform(lambda cell: cell.dt.to_string(format=format_)) + with pytest.raises(LazyComputationError): + lazy_result.get_value(0) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_unix_timestamp.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_unix_timestamp.py new file mode 100644 index 000000000..2bab01580 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_unix_timestamp.py @@ -0,0 +1,39 @@ +from datetime import UTC, datetime +from typing import Literal + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "unit", "expected"), + [ + (datetime(1970, 1, 1, tzinfo=UTC), "s", 0), + (datetime(1969, 12, 31, tzinfo=UTC), "s", -86400), + (datetime(1970, 1, 2, tzinfo=UTC), "s", 86400), + (datetime(1970, 1, 2, tzinfo=UTC), "ms", 86400000), + (datetime(1970, 1, 2, tzinfo=UTC), "us", 86400000000), + (None, "s", None), + ], + ids=[ + "epoch", + "one day before epoch", + "one day after epoch (seconds)", + "one day after epoch (milliseconds)", + "one day after epoch (microseconds)", + "None", + ], +) +def test_should_return_unix_timestamp( + value: datetime | None, + unit: Literal["s", "ms", "us"], + expected: int | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.unix_timestamp(unit=unit), + expected, + type_if_none=ColumnType.datetime(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_week.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_week.py new file mode 100644 index 000000000..bee495dec --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_week.py @@ -0,0 +1,115 @@ +from datetime import UTC, date, datetime + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + # datetime - first day is Monday + (datetime(2024, 1, 1, tzinfo=UTC), 1), + # datetime - first day is Tuesday + (datetime(2030, 1, 1, tzinfo=UTC), 1), + # datetime - first day is Wednesday + (datetime(2025, 1, 1, tzinfo=UTC), 1), + # datetime - first day is Thursday + (datetime(2026, 1, 1, tzinfo=UTC), 1), + # datetime - first day is Friday + (datetime(2027, 1, 1, tzinfo=UTC), 53), + # datetime - first day is Saturday + (datetime(2028, 1, 1, tzinfo=UTC), 52), + # datetime - first day is Sunday + (datetime(2023, 1, 1, tzinfo=UTC), 52), + # datetime - last day is Monday + (datetime(2029, 12, 31, tzinfo=UTC), 1), + # datetime - last day is Tuesday + (datetime(2024, 12, 31, tzinfo=UTC), 1), + # datetime - last day is Wednesday + (datetime(2025, 12, 31, tzinfo=UTC), 1), + # datetime - last day is Thursday + (datetime(2026, 12, 31, tzinfo=UTC), 53), + # datetime - last day is Friday + (datetime(2027, 12, 31, tzinfo=UTC), 52), + # datetime - last day is Saturday + (datetime(2022, 12, 31, tzinfo=UTC), 52), + # datetime - last day is Sunday + (datetime(2023, 12, 31, tzinfo=UTC), 52), + # date - first day is Monday + (date(2024, 1, 1), 1), + # date - first day is Tuesday + (date(2030, 1, 1), 1), + # date - first day is Wednesday + (date(2025, 1, 1), 1), + # date - first day is Thursday + (date(2026, 1, 1), 1), + # date - first day is Friday + (date(2027, 1, 1), 53), + # date - first day is Saturday + (date(2028, 1, 1), 52), + # date - first day is Sunday + (date(2023, 1, 1), 52), + # date - last day is Monday + (date(2029, 12, 31), 1), + # date - last day is Tuesday + (date(2024, 12, 31), 1), + # date - last day is Wednesday + (date(2025, 12, 31), 1), + # date - last day is Thursday + (date(2026, 12, 31), 53), + # date - last day is Friday + (date(2027, 12, 31), 52), + # date - last day is Saturday + (date(2022, 12, 31), 52), + # date - last day is Sunday + (date(2023, 12, 31), 52), + # None + (None, None), + ], + ids=[ + # datetime + "datetime - first day is Monday", + "datetime - first day is Tuesday", + "datetime - first day is Wednesday", + "datetime - first day is Thursday", + "datetime - first day is Friday", + "datetime - first day is Saturday", + "datetime - first day is Sunday", + "datetime - last day is Monday", + "datetime - last day is Tuesday", + "datetime - last day is Wednesday", + "datetime - last day is Thursday", + "datetime - last day is Friday", + "datetime - last day is Saturday", + "datetime - last day is Sunday", + # date + "date - first day is Monday", + "date - first day is Tuesday", + "date - first day is Wednesday", + "date - first day is Thursday", + "date - first day is Friday", + "date - first day is Saturday", + "date - first day is Sunday", + "date - last day is Monday", + "date - last day is Tuesday", + "date - last day is Wednesday", + "date - last day is Thursday", + "date - last day is Friday", + "date - last day is Saturday", + "date - last day is Sunday", + # None + "None", + ], +) +def test_should_extract_week( + value: datetime | date | None, + expected: int | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.week(), + expected, + type_if_none=ColumnType.datetime(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_year.py b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_year.py new file mode 100644 index 000000000..b7d38395b --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_datetime_operations/test_year.py @@ -0,0 +1,31 @@ +from datetime import UTC, date, datetime + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (datetime(1, 2, 3, tzinfo=UTC), 1), + (date(1, 2, 3), 1), + (None, None), + ], + ids=[ + "datetime", + "date", + "None", + ], +) +def test_should_extract_year( + value: datetime | date | None, + expected: int | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dt.year(), + expected, + type_if_none=ColumnType.datetime(), + ) diff --git a/tests/safeds/data/tabular/containers/_lazy_temporal_cell/__init__.py b/tests/safeds/data/tabular/query/_lazy_duration_operations/__init__.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_temporal_cell/__init__.py rename to tests/safeds/data/tabular/query/_lazy_duration_operations/__init__.py diff --git a/tests/safeds/data/tabular/query/_lazy_duration_operations/__snapshots__/test_hash.ambr b/tests/safeds/data/tabular/query/_lazy_duration_operations/__snapshots__/test_hash.ambr new file mode 100644 index 000000000..99b9cb6b3 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_duration_operations/__snapshots__/test_hash.ambr @@ -0,0 +1,7 @@ +# serializer version: 1 +# name: TestContract.test_should_return_same_hash_in_different_processes[column] + 8162512882156938440 +# --- +# name: TestContract.test_should_return_same_hash_in_different_processes[duration] + 2005674043565732975 +# --- diff --git a/tests/safeds/data/tabular/query/_lazy_duration_operations/test_abs.py b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_abs.py new file mode 100644 index 000000000..7c1f5b023 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_abs.py @@ -0,0 +1,31 @@ +from datetime import timedelta + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (timedelta(days=1), timedelta(days=1)), + (timedelta(days=1, hours=12), timedelta(days=1, hours=12)), + (timedelta(days=-1), timedelta(days=1)), + (timedelta(days=-1, hours=-12), timedelta(days=1, hours=12)), + (timedelta(days=1, hours=-12), timedelta(hours=12)), + (timedelta(days=-1, hours=12), timedelta(hours=12)), + (None, None), + ], + ids=[ + "positive days", + "positive days and hours", + "negative days", + "negative days and hours", + "positive days, negative hours", + "negative days, positive hours", + "None", + ], +) +def test_should_return_absolute_duration(value: timedelta | None, expected: timedelta | None) -> None: + assert_cell_operation_works(value, lambda cell: cell.dur.abs(), expected, type_if_none=ColumnType.duration()) diff --git a/tests/safeds/data/tabular/query/_lazy_duration_operations/test_eq.py b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_eq.py new file mode 100644 index 000000000..c7810b321 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_eq.py @@ -0,0 +1,90 @@ +from typing import Any + +import polars as pl +import pytest + +from safeds.data.tabular.containers import Cell, Column +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import DurationOperations + + +@pytest.mark.parametrize( + ("ops_1", "ops_2", "expected"), + [ + # equal (duration) + ( + Cell.duration(hours=1).dur, + Cell.duration(hours=1).dur, + True, + ), + # equal (column) + ( + _LazyCell(pl.col("a")).dur, + _LazyCell(pl.col("a")).dur, + True, + ), + # not equal (different durations) + ( + Cell.duration(hours=1).dur, + Cell.duration(hours=2).dur, + False, + ), + # not equal (different columns) + ( + _LazyCell(pl.col("a")).dur, + _LazyCell(pl.col("b")).dur, + False, + ), + # not equal (different cell kinds) + ( + Cell.duration(hours=1).dur, + _LazyCell(pl.col("a")).dur, + False, + ), + ], + ids=[ + # Equal + "equal (duration)", + "equal (column)", + # Not equal + "not equal (different durations)", + "not equal (different columns)", + "not equal (different cell kinds)", + ], +) +def test_should_return_whether_objects_are_equal( + ops_1: DurationOperations, + ops_2: DurationOperations, + expected: bool, +) -> None: + assert (ops_1.__eq__(ops_2)) == expected + + +@pytest.mark.parametrize( + "ops", + [ + Cell.duration(hours=1).dur, + _LazyCell(pl.col("a")).dur, + ], + ids=[ + "duration", + "column", + ], +) +def test_should_return_true_if_objects_are_identical(ops: DurationOperations) -> None: + assert (ops.__eq__(ops)) is True + + +@pytest.mark.parametrize( + ("ops", "other"), + [ + (Cell.duration(hours=1).dur, None), + (Cell.duration(hours=1).dur, Column("col1", [1])), + ], + ids=[ + "DurationOperations vs. None", + "DurationOperations vs. Column", + ], +) +def test_should_return_not_implemented_if_other_has_different_type(ops: DurationOperations, other: Any) -> None: + assert (ops.__eq__(other)) is NotImplemented diff --git a/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_days.py b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_days.py new file mode 100644 index 000000000..facdf0214 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_days.py @@ -0,0 +1,29 @@ +from datetime import timedelta + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (timedelta(days=1), 1), + (timedelta(days=1, hours=12), 1), + (timedelta(days=-1), -1), + (timedelta(days=-1, hours=-12), -1), + (timedelta(days=1, hours=-12), 0), + (None, None), + ], + ids=[ + "positive, exact", + "positive, rounded", + "negative, exact", + "negative, rounded", + "mixed", + "None", + ], +) +def test_should_return_full_days(value: timedelta | None, expected: int | None) -> None: + assert_cell_operation_works(value, lambda cell: cell.dur.full_days(), expected, type_if_none=ColumnType.duration()) diff --git a/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_hours.py b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_hours.py new file mode 100644 index 000000000..dc01ec4b8 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_hours.py @@ -0,0 +1,29 @@ +from datetime import timedelta + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (timedelta(hours=1), 1), + (timedelta(hours=1, minutes=30), 1), + (timedelta(hours=-1), -1), + (timedelta(hours=-1, minutes=-30), -1), + (timedelta(hours=1, minutes=-30), 0), + (None, None), + ], + ids=[ + "positive, exact", + "positive, rounded", + "negative, exact", + "negative, rounded", + "mixed", + "None", + ], +) +def test_should_return_full_hours(value: timedelta | None, expected: int | None) -> None: + assert_cell_operation_works(value, lambda cell: cell.dur.full_hours(), expected, type_if_none=ColumnType.duration()) diff --git a/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_microseconds.py b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_microseconds.py new file mode 100644 index 000000000..e1bcd6271 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_microseconds.py @@ -0,0 +1,30 @@ +from datetime import timedelta + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (timedelta(microseconds=1), 1), + (timedelta(microseconds=-1), -1), + (timedelta(milliseconds=1, microseconds=-500), 500), + (None, None), + ], + ids=[ + "positive, exact", + "negative, exact", + "mixed", + "None", + ], +) +def test_should_return_full_microseconds(value: timedelta | None, expected: int | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dur.full_microseconds(), + expected, + type_if_none=ColumnType.duration(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_milliseconds.py b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_milliseconds.py new file mode 100644 index 000000000..5bcd0d58e --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_milliseconds.py @@ -0,0 +1,34 @@ +from datetime import timedelta + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (timedelta(milliseconds=1), 1), + (timedelta(milliseconds=1, microseconds=500), 1), + (timedelta(milliseconds=-1), -1), + (timedelta(milliseconds=-1, microseconds=-500), -1), + (timedelta(milliseconds=1, microseconds=-500), 0), + (None, None), + ], + ids=[ + "positive, exact", + "positive, rounded", + "negative, exact", + "negative, rounded", + "mixed", + "None", + ], +) +def test_should_return_full_milliseconds(value: timedelta | None, expected: int | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dur.full_milliseconds(), + expected, + type_if_none=ColumnType.duration(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_minutes.py b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_minutes.py new file mode 100644 index 000000000..21e962307 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_minutes.py @@ -0,0 +1,34 @@ +from datetime import timedelta + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (timedelta(minutes=1), 1), + (timedelta(minutes=1, seconds=30), 1), + (timedelta(minutes=-1), -1), + (timedelta(minutes=-1, seconds=-30), -1), + (timedelta(minutes=1, seconds=-30), 0), + (None, None), + ], + ids=[ + "positive, exact", + "positive, rounded", + "negative, exact", + "negative, rounded", + "mixed", + "None", + ], +) +def test_should_return_full_minutes(value: timedelta | None, expected: int | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dur.full_minutes(), + expected, + type_if_none=ColumnType.duration(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_seconds.py b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_seconds.py new file mode 100644 index 000000000..7d1611a59 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_seconds.py @@ -0,0 +1,34 @@ +from datetime import timedelta + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (timedelta(seconds=1), 1), + (timedelta(seconds=1, milliseconds=500), 1), + (timedelta(seconds=-1), -1), + (timedelta(seconds=-1, milliseconds=-500), -1), + (timedelta(seconds=1, milliseconds=-500), 0), + (None, None), + ], + ids=[ + "positive, exact", + "positive, rounded", + "negative, exact", + "negative, rounded", + "mixed", + "None", + ], +) +def test_should_return_full_seconds(value: timedelta | None, expected: int | None) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dur.full_seconds(), + expected, + type_if_none=ColumnType.duration(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_weeks.py b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_weeks.py new file mode 100644 index 000000000..f2f2a3ebd --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_full_weeks.py @@ -0,0 +1,29 @@ +from datetime import timedelta + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (timedelta(weeks=1), 1), + (timedelta(weeks=1, days=3), 1), + (timedelta(weeks=-1), -1), + (timedelta(weeks=-1, days=-3), -1), + (timedelta(weeks=1, days=-3), 0), + (None, None), + ], + ids=[ + "positive, exact", + "positive, rounded", + "negative, exact", + "negative, rounded", + "mixed", + "None", + ], +) +def test_should_return_full_weeks(value: timedelta | None, expected: int | None) -> None: + assert_cell_operation_works(value, lambda cell: cell.dur.full_weeks(), expected, type_if_none=ColumnType.duration()) diff --git a/tests/safeds/data/tabular/query/_lazy_duration_operations/test_hash.py b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_hash.py new file mode 100644 index 000000000..baf9a9e6e --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_hash.py @@ -0,0 +1,64 @@ +from collections.abc import Callable + +import polars as pl +import pytest +from syrupy import SnapshotAssertion + +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import DurationOperations + + +@pytest.mark.parametrize( + "ops_factory", + [ + lambda: Cell.duration(hours=1).dur, + lambda: _LazyCell(pl.col("a")).dur, + ], + ids=[ + "duration", + "column", + ], +) +class TestContract: + def test_should_return_same_hash_for_equal_objects(self, ops_factory: Callable[[], DurationOperations]) -> None: + ops_1 = ops_factory() + ops_2 = ops_factory() + assert hash(ops_1) == hash(ops_2) + + def test_should_return_same_hash_in_different_processes( + self, + ops_factory: Callable[[], DurationOperations], + snapshot: SnapshotAssertion, + ) -> None: + ops = ops_factory() + assert hash(ops) == snapshot + + +@pytest.mark.parametrize( + ("ops_1", "ops_2"), + [ + # different durations + ( + Cell.duration(hours=1).dur, + Cell.duration(hours=2).dur, + ), + # different columns + ( + _LazyCell(pl.col("a")).dur, + _LazyCell(pl.col("b")).dur, + ), + # different cell kinds + ( + Cell.duration(hours=1).dur, + _LazyCell(pl.col("a")).dur, + ), + ], + ids=[ + "different durations", + "different columns", + "different cell kinds", + ], +) +def test_should_be_good_hash(ops_1: DurationOperations, ops_2: DurationOperations) -> None: + assert hash(ops_1) != hash(ops_2) diff --git a/tests/safeds/data/tabular/query/_lazy_duration_operations/test_repr.py b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_repr.py new file mode 100644 index 000000000..ec3bbb518 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_repr.py @@ -0,0 +1,22 @@ +import polars as pl +import pytest + +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import DurationOperations + + +@pytest.mark.parametrize( + ("ops", "expected"), + [ + ( + _LazyCell(pl.col("a")).dur, + '_LazyDurationOperations(col("a"))', + ), + ], + ids=[ + "column", + ], +) +def test_should_return_a_string_representation(ops: DurationOperations, expected: str) -> None: + # We do not care about the exact string representation, this is only for debugging + assert repr(ops) == expected diff --git a/tests/safeds/data/tabular/query/_lazy_duration_operations/test_sizeof.py b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_sizeof.py new file mode 100644 index 000000000..512ef6a66 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_sizeof.py @@ -0,0 +1,23 @@ +import sys + +import polars as pl +import pytest + +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import DurationOperations + + +@pytest.mark.parametrize( + "ops", + [ + Cell.duration(hours=1).dur, + _LazyCell(pl.col("a")).dur, + ], + ids=[ + "duration", + "column", + ], +) +def test_should_be_larger_than_normal_object(ops: DurationOperations) -> None: + assert sys.getsizeof(ops) > sys.getsizeof(object()) diff --git a/tests/safeds/data/tabular/query/_lazy_duration_operations/test_str.py b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_str.py new file mode 100644 index 000000000..0cb235d11 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_str.py @@ -0,0 +1,22 @@ +import polars as pl +import pytest + +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import DurationOperations + + +@pytest.mark.parametrize( + ("ops", "expected"), + [ + ( + _LazyCell(pl.col("a")).dur, + '(col("a")).dur', + ), + ], + ids=[ + "column", + ], +) +def test_should_return_a_string_representation(ops: DurationOperations, expected: str) -> None: + # We do not care about the exact string representation, this is only for debugging + assert str(ops) == expected diff --git a/tests/safeds/data/tabular/query/_lazy_duration_operations/test_to_string.py b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_to_string.py new file mode 100644 index 000000000..f6ac84a1d --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_duration_operations/test_to_string.py @@ -0,0 +1,127 @@ +from datetime import timedelta +from typing import Literal + +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "format_", "expected"), + [ + # ISO 8601 format + (timedelta(weeks=1), "iso", "P7D"), + (timedelta(weeks=-1), "iso", "-P7D"), + (timedelta(days=1), "iso", "P1D"), + (timedelta(days=-1), "iso", "-P1D"), + (timedelta(hours=1), "iso", "PT1H"), + (timedelta(hours=-1), "iso", "-PT1H"), + (timedelta(minutes=1), "iso", "PT1M"), + (timedelta(minutes=-1), "iso", "-PT1M"), + (timedelta(seconds=1), "iso", "PT1S"), + (timedelta(seconds=-1), "iso", "-PT1S"), + (timedelta(milliseconds=1), "iso", "PT0.001S"), + (timedelta(milliseconds=-1), "iso", "-PT0.001S"), + (timedelta(microseconds=1), "iso", "PT0.000001S"), + (timedelta(microseconds=-1), "iso", "-PT0.000001S"), + ( + timedelta(weeks=1, days=1, hours=1, minutes=1, seconds=1, milliseconds=1, microseconds=1), + "iso", + "P8DT1H1M1.001001S", + ), + ( + timedelta(weeks=-1, days=-1, hours=-1, minutes=-1, seconds=-1, milliseconds=-1, microseconds=-1), + "iso", + "-P8DT1H1M1.001001S", + ), + ( + timedelta(weeks=1, days=-1, hours=1, minutes=-1, seconds=1, milliseconds=-1, microseconds=1), + "iso", + "P6DT59M0.999001S", + ), + (None, "iso", None), + # Pretty format + (timedelta(weeks=1), "pretty", "7d"), + (timedelta(weeks=-1), "pretty", "-7d"), + (timedelta(days=1), "pretty", "1d"), + (timedelta(days=-1), "pretty", "-1d"), + (timedelta(hours=1), "pretty", "1h"), + (timedelta(hours=-1), "pretty", "-1h"), + (timedelta(minutes=1), "pretty", "1m"), + (timedelta(minutes=-1), "pretty", "-1m"), + (timedelta(seconds=1), "pretty", "1s"), + (timedelta(seconds=-1), "pretty", "-1s"), + (timedelta(milliseconds=1), "pretty", "1ms"), + (timedelta(milliseconds=-1), "pretty", "-1ms"), + (timedelta(microseconds=1), "pretty", "1µs"), + (timedelta(microseconds=-1), "pretty", "-1µs"), + ( + timedelta(weeks=1, days=1, hours=1, minutes=1, seconds=1, milliseconds=1, microseconds=1), + "pretty", + "8d 1h 1m 1s 1001µs", + ), + ( + timedelta(weeks=-1, days=-1, hours=-1, minutes=-1, seconds=-1, milliseconds=-1, microseconds=-1), + "pretty", + "-8d -1h -1m -1s -1001µs", + ), + ( + timedelta(weeks=1, days=-1, hours=1, minutes=-1, seconds=1, milliseconds=-1, microseconds=1), + "pretty", + "6d 59m 999001µs", + ), + (None, "pretty", None), + ], + ids=[ + # ISO 8601 format + "iso - positive weeks", + "iso - negative weeks", + "iso - positive days", + "iso - negative days", + "iso - positive hours", + "iso - negative hours", + "iso - positive minutes", + "iso - negative minutes", + "iso - positive seconds", + "iso - negative seconds", + "iso - positive milliseconds", + "iso - negative milliseconds", + "iso - positive microseconds", + "iso - negative microseconds", + "iso - all positive", + "iso - all negative", + "iso - mixed", + "iso - None", + # Pretty format + "pretty - positive weeks", + "pretty - negative weeks", + "pretty - positive days", + "pretty - negative days", + "pretty - positive hours", + "pretty - negative hours", + "pretty - positive minutes", + "pretty - negative minutes", + "pretty - positive seconds", + "pretty - negative seconds", + "pretty - positive milliseconds", + "pretty - negative milliseconds", + "pretty - positive microseconds", + "pretty - negative microseconds", + "pretty - all positive", + "pretty - all negative", + "pretty - mixed", + "pretty - None", + ], +) +def test_should_return_string_representation( + value: timedelta | None, + format_: Literal["iso", "pretty"], + expected: str | None, +) -> None: + assert_cell_operation_works( + value, + lambda cell: cell.dur.to_string(format=format_), + expected, + type_if_none=ColumnType.duration(), + ) diff --git a/tests/safeds/data/tabular/query/_lazy_math_operations/__init__.py b/tests/safeds/data/tabular/query/_lazy_math_operations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/safeds/data/tabular/query/_lazy_math_operations/__snapshots__/test_hash.ambr b/tests/safeds/data/tabular/query/_lazy_math_operations/__snapshots__/test_hash.ambr new file mode 100644 index 000000000..87706531c --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_math_operations/__snapshots__/test_hash.ambr @@ -0,0 +1,7 @@ +# serializer version: 1 +# name: TestContract.test_should_return_same_hash_in_different_processes[column] + 8162512882156938440 +# --- +# name: TestContract.test_should_return_same_hash_in_different_processes[constant] + 4610312201483200147 +# --- diff --git a/tests/safeds/data/tabular/query/_lazy_math_operations/test_abs.py b/tests/safeds/data/tabular/query/_lazy_math_operations/test_abs.py new file mode 100644 index 000000000..d7d119d26 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_math_operations/test_abs.py @@ -0,0 +1,29 @@ +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (0, 0), + (0.0, 0.0), + (10, 10), + (10.5, 10.5), + (-10, 10), + (-10.5, 10.5), + (None, None), + ], + ids=[ + "zero int", + "zero float", + "positive int", + "positive float", + "negative int", + "negative float", + "None", + ], +) +def test_should_return_absolute_value(value: float | None, expected: float | None) -> None: + assert_cell_operation_works(value, lambda cell: cell.math.abs(), expected, type_if_none=ColumnType.float64()) diff --git a/tests/safeds/data/tabular/query/_lazy_math_operations/test_ceil.py b/tests/safeds/data/tabular/query/_lazy_math_operations/test_ceil.py new file mode 100644 index 000000000..814c81d6a --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_math_operations/test_ceil.py @@ -0,0 +1,29 @@ +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (0, 0), + (0.0, 0), + (10, 10), + (10.5, 11), + (-10, -10), + (-10.5, -10), + (None, None), + ], + ids=[ + "zero int", + "zero float", + "positive int", + "positive float", + "negative int", + "negative float", + "None", + ], +) +def test_should_return_ceiling(value: float | None, expected: float | None) -> None: + assert_cell_operation_works(value, lambda cell: cell.math.ceil(), expected, type_if_none=ColumnType.float64()) diff --git a/tests/safeds/data/tabular/query/_lazy_math_operations/test_eq.py b/tests/safeds/data/tabular/query/_lazy_math_operations/test_eq.py new file mode 100644 index 000000000..f75bbe902 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_math_operations/test_eq.py @@ -0,0 +1,90 @@ +from typing import Any + +import polars as pl +import pytest + +from safeds.data.tabular.containers import Cell, Column +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import MathOperations + + +@pytest.mark.parametrize( + ("ops_1", "ops_2", "expected"), + [ + # equal (constant) + ( + Cell.constant(1).math, + Cell.constant(1).math, + True, + ), + # equal (column) + ( + _LazyCell(pl.col("a")).math, + _LazyCell(pl.col("a")).math, + True, + ), + # not equal (different constant) + ( + Cell.constant(1).math, + Cell.constant(2).math, + False, + ), + # not equal (different columns) + ( + _LazyCell(pl.col("a")).math, + _LazyCell(pl.col("b")).math, + False, + ), + # not equal (different cell kinds) + ( + Cell.constant(1).math, + _LazyCell(pl.col("a")).math, + False, + ), + ], + ids=[ + # Equal + "equal (constant)", + "equal (column)", + # Not equal + "not equal (different constants)", + "not equal (different columns)", + "not equal (different cell kinds)", + ], +) +def test_should_return_whether_objects_are_equal( + ops_1: MathOperations, + ops_2: MathOperations, + expected: bool, +) -> None: + assert (ops_1.__eq__(ops_2)) == expected + + +@pytest.mark.parametrize( + "ops", + [ + Cell.constant(1).math, + _LazyCell(pl.col("a")).math, + ], + ids=[ + "duration", + "column", + ], +) +def test_should_return_true_if_objects_are_identical(ops: MathOperations) -> None: + assert (ops.__eq__(ops)) is True + + +@pytest.mark.parametrize( + ("ops", "other"), + [ + (Cell.constant(1).math, None), + (Cell.constant(1).math, Column("col1", [1])), + ], + ids=[ + "MathOperations vs. None", + "MathOperations vs. Column", + ], +) +def test_should_return_not_implemented_if_other_has_different_type(ops: MathOperations, other: Any) -> None: + assert (ops.__eq__(other)) is NotImplemented diff --git a/tests/safeds/data/tabular/query/_lazy_math_operations/test_floor.py b/tests/safeds/data/tabular/query/_lazy_math_operations/test_floor.py new file mode 100644 index 000000000..d773fe90a --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_math_operations/test_floor.py @@ -0,0 +1,29 @@ +import pytest + +from safeds.data.tabular.typing import ColumnType +from tests.helpers import assert_cell_operation_works + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (0, 0), + (0.0, 0), + (10, 10), + (10.5, 10), + (-10, -10), + (-10.5, -11), + (None, None), + ], + ids=[ + "zero int", + "zero float", + "positive int", + "positive float", + "negative int", + "negative float", + "None", + ], +) +def test_should_return_floor(value: float | None, expected: float | None) -> None: + assert_cell_operation_works(value, lambda cell: cell.math.floor(), expected, type_if_none=ColumnType.float64()) diff --git a/tests/safeds/data/tabular/query/_lazy_math_operations/test_hash.py b/tests/safeds/data/tabular/query/_lazy_math_operations/test_hash.py new file mode 100644 index 000000000..4e7f63514 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_math_operations/test_hash.py @@ -0,0 +1,64 @@ +from collections.abc import Callable + +import polars as pl +import pytest +from syrupy import SnapshotAssertion + +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import MathOperations + + +@pytest.mark.parametrize( + "ops_factory", + [ + lambda: Cell.constant(1).math, + lambda: _LazyCell(pl.col("a")).math, + ], + ids=[ + "constant", + "column", + ], +) +class TestContract: + def test_should_return_same_hash_for_equal_objects(self, ops_factory: Callable[[], MathOperations]) -> None: + ops_1 = ops_factory() + ops_2 = ops_factory() + assert hash(ops_1) == hash(ops_2) + + def test_should_return_same_hash_in_different_processes( + self, + ops_factory: Callable[[], MathOperations], + snapshot: SnapshotAssertion, + ) -> None: + ops = ops_factory() + assert hash(ops) == snapshot + + +@pytest.mark.parametrize( + ("ops_1", "ops_2"), + [ + # different constant values + ( + Cell.constant(1).math, + Cell.constant(2).math, + ), + # different columns + ( + _LazyCell(pl.col("a")).math, + _LazyCell(pl.col("b")).math, + ), + # different cell kinds + ( + Cell.constant(1).math, + _LazyCell(pl.col("a")).math, + ), + ], + ids=[ + "different constant values", + "different columns", + "different cell kinds", + ], +) +def test_should_be_good_hash(ops_1: MathOperations, ops_2: MathOperations) -> None: + assert hash(ops_1) != hash(ops_2) diff --git a/tests/safeds/data/tabular/query/_lazy_math_operations/test_repr.py b/tests/safeds/data/tabular/query/_lazy_math_operations/test_repr.py new file mode 100644 index 000000000..b3657bc63 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_math_operations/test_repr.py @@ -0,0 +1,22 @@ +import polars as pl +import pytest + +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import MathOperations + + +@pytest.mark.parametrize( + ("ops", "expected"), + [ + ( + _LazyCell(pl.col("a")).math, + '_LazyMathOperations(col("a"))', + ), + ], + ids=[ + "column", + ], +) +def test_should_return_a_string_representation(ops: MathOperations, expected: str) -> None: + # We do not care about the exact string representation, this is only for debugging + assert repr(ops) == expected diff --git a/tests/safeds/data/tabular/query/_lazy_math_operations/test_sizeof.py b/tests/safeds/data/tabular/query/_lazy_math_operations/test_sizeof.py new file mode 100644 index 000000000..25f6c0a94 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_math_operations/test_sizeof.py @@ -0,0 +1,23 @@ +import sys + +import polars as pl +import pytest + +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import MathOperations + + +@pytest.mark.parametrize( + "ops", + [ + Cell.constant(1).math, + _LazyCell(pl.col("a")).math, + ], + ids=[ + "constant", + "column", + ], +) +def test_should_be_larger_than_normal_object(ops: MathOperations) -> None: + assert sys.getsizeof(ops) > sys.getsizeof(object()) diff --git a/tests/safeds/data/tabular/query/_lazy_math_operations/test_str.py b/tests/safeds/data/tabular/query/_lazy_math_operations/test_str.py new file mode 100644 index 000000000..6e40ae607 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_math_operations/test_str.py @@ -0,0 +1,22 @@ +import polars as pl +import pytest + +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import MathOperations + + +@pytest.mark.parametrize( + ("ops", "expected"), + [ + ( + _LazyCell(pl.col("a")).math, + '(col("a")).math', + ), + ], + ids=[ + "column", + ], +) +def test_should_return_a_string_representation(ops: MathOperations, expected: str) -> None: + # We do not care about the exact string representation, this is only for debugging + assert str(ops) == expected diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/__init__.py b/tests/safeds/data/tabular/query/_lazy_string_operations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/__snapshots__/test_hash.ambr b/tests/safeds/data/tabular/query/_lazy_string_operations/__snapshots__/test_hash.ambr new file mode 100644 index 000000000..cdae41257 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/__snapshots__/test_hash.ambr @@ -0,0 +1,7 @@ +# serializer version: 1 +# name: TestContract.test_should_return_same_hash_in_different_processes[column] + 8162512882156938440 +# --- +# name: TestContract.test_should_return_same_hash_in_different_processes[constant] + 6067426592045063520 +# --- diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_contains.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_contains.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/test_contains.py rename to tests/safeds/data/tabular/query/_lazy_string_operations/test_contains.py diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_ends_with.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_ends_with.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/test_ends_with.py rename to tests/safeds/data/tabular/query/_lazy_string_operations/test_ends_with.py diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_eq.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_eq.py new file mode 100644 index 000000000..4d6ce2779 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_eq.py @@ -0,0 +1,90 @@ +from typing import Any + +import polars as pl +import pytest + +from safeds.data.tabular.containers import Cell, Column +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import StringOperations + + +@pytest.mark.parametrize( + ("ops_1", "ops_2", "expected"), + [ + # equal (constant) + ( + Cell.constant("a").str, + Cell.constant("a").str, + True, + ), + # equal (column) + ( + _LazyCell(pl.col("a")).str, + _LazyCell(pl.col("a")).str, + True, + ), + # not equal (different constant values) + ( + Cell.constant("a").str, + Cell.constant("b").str, + False, + ), + # not equal (different columns) + ( + _LazyCell(pl.col("a")).str, + _LazyCell(pl.col("b")).str, + False, + ), + # not equal (different cell kinds) + ( + Cell.constant("a").str, + _LazyCell(pl.col("a")).str, + False, + ), + ], + ids=[ + # Equal + "equal (constant)", + "equal (column)", + # Not equal + "not equal (different constant values)", + "not equal (different columns)", + "not equal (different cell kinds)", + ], +) +def test_should_return_whether_objects_are_equal( + ops_1: StringOperations, + ops_2: StringOperations, + expected: bool, +) -> None: + assert (ops_1.__eq__(ops_2)) == expected + + +@pytest.mark.parametrize( + "ops", + [ + Cell.constant("a").str, + _LazyCell(pl.col("a")).str, + ], + ids=[ + "constant", + "column", + ], +) +def test_should_return_true_if_objects_are_identical(ops: StringOperations) -> None: + assert (ops.__eq__(ops)) is True + + +@pytest.mark.parametrize( + ("ops", "other"), + [ + (Cell.constant("a").str, None), + (Cell.constant("a").str, Column("col1", [1])), + ], + ids=[ + "StringOperations vs. None", + "StringOperations vs. Column", + ], +) +def test_should_return_not_implemented_if_other_has_different_type(ops: StringOperations, other: Any) -> None: + assert (ops.__eq__(other)) is NotImplemented diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_hash.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_hash.py new file mode 100644 index 000000000..66de2a7bf --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_hash.py @@ -0,0 +1,64 @@ +from collections.abc import Callable + +import polars as pl +import pytest +from syrupy import SnapshotAssertion + +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import StringOperations + + +@pytest.mark.parametrize( + "ops_factory", + [ + lambda: Cell.constant("a").str, + lambda: _LazyCell(pl.col("a")).str, + ], + ids=[ + "constant", + "column", + ], +) +class TestContract: + def test_should_return_same_hash_for_equal_objects(self, ops_factory: Callable[[], StringOperations]) -> None: + ops_1 = ops_factory() + ops_2 = ops_factory() + assert hash(ops_1) == hash(ops_2) + + def test_should_return_same_hash_in_different_processes( + self, + ops_factory: Callable[[], StringOperations], + snapshot: SnapshotAssertion, + ) -> None: + ops = ops_factory() + assert hash(ops) == snapshot + + +@pytest.mark.parametrize( + ("ops_1", "ops_2"), + [ + # different constant values + ( + Cell.constant("a").str, + Cell.constant("b").str, + ), + # different columns + ( + _LazyCell(pl.col("a")).str, + _LazyCell(pl.col("b")).str, + ), + # different cell kinds + ( + Cell.constant("a").str, + _LazyCell(pl.col("a")).str, + ), + ], + ids=[ + "different constant values", + "different columns", + "different cell kinds", + ], +) +def test_should_be_good_hash(ops_1: StringOperations, ops_2: StringOperations) -> None: + assert hash(ops_1) != hash(ops_2) diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_index_of.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_index_of.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/test_index_of.py rename to tests/safeds/data/tabular/query/_lazy_string_operations/test_index_of.py diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_length.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_length.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/test_length.py rename to tests/safeds/data/tabular/query/_lazy_string_operations/test_length.py diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_replace.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_replace.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/test_replace.py rename to tests/safeds/data/tabular/query/_lazy_string_operations/test_replace.py diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_repr.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_repr.py new file mode 100644 index 000000000..757dd1aa1 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_repr.py @@ -0,0 +1,28 @@ +import polars as pl +import pytest + +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import StringOperations + + +@pytest.mark.parametrize( + ("ops", "expected"), + [ + ( + Cell.constant("a").str, + "_LazyStringOperations(String(a))", + ), + ( + _LazyCell(pl.col("a")).str, + '_LazyStringOperations(col("a"))', + ), + ], + ids=[ + "constant", + "column", + ], +) +def test_should_return_a_string_representation(ops: StringOperations, expected: str) -> None: + # We do not care about the exact string representation, this is only for debugging + assert repr(ops) == expected diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_sizeof.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_sizeof.py new file mode 100644 index 000000000..8293e5f0b --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_sizeof.py @@ -0,0 +1,23 @@ +import sys + +import polars as pl +import pytest + +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import StringOperations + + +@pytest.mark.parametrize( + "ops", + [ + Cell.constant("a").str, + _LazyCell(pl.col("a")).str, + ], + ids=[ + "constant", + "column", + ], +) +def test_should_be_larger_than_normal_object(ops: StringOperations) -> None: + assert sys.getsizeof(ops) > sys.getsizeof(object()) diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_starts_with.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_starts_with.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/test_starts_with.py rename to tests/safeds/data/tabular/query/_lazy_string_operations/test_starts_with.py diff --git a/tests/safeds/data/tabular/query/_lazy_string_operations/test_str.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_str.py new file mode 100644 index 000000000..b23027814 --- /dev/null +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_str.py @@ -0,0 +1,28 @@ +import polars as pl +import pytest + +from safeds.data.tabular.containers import Cell +from safeds.data.tabular.containers._lazy_cell import _LazyCell +from safeds.data.tabular.query import StringOperations + + +@pytest.mark.parametrize( + ("ops", "expected"), + [ + ( + Cell.constant("a").str, + "(String(a)).str", + ), + ( + _LazyCell(pl.col("a")).str, + '(col("a")).str', + ), + ], + ids=[ + "constant", + "column", + ], +) +def test_should_return_a_string_representation(ops: StringOperations, expected: str) -> None: + # We do not care about the exact string representation, this is only for debugging + assert str(ops) == expected diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_substring.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_substring.py similarity index 95% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/test_substring.py rename to tests/safeds/data/tabular/query/_lazy_string_operations/test_substring.py index ab2496486..8d1164a38 100644 --- a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_substring.py +++ b/tests/safeds/data/tabular/query/_lazy_string_operations/test_substring.py @@ -28,7 +28,7 @@ ], ) def test_should_return_substring(string: str, start: int, length: int | None, expected: str) -> None: - assert_cell_operation_works(string, lambda cell: cell.str.substring(start, length), expected) + assert_cell_operation_works(string, lambda cell: cell.str.substring(start=start, length=length), expected) def test_should_raise_if_length_is_negative() -> None: diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_to_date.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_date.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/test_to_date.py rename to tests/safeds/data/tabular/query/_lazy_string_operations/test_to_date.py diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_to_datetime.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_datetime.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/test_to_datetime.py rename to tests/safeds/data/tabular/query/_lazy_string_operations/test_to_datetime.py diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_to_int.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_int.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/test_to_int.py rename to tests/safeds/data/tabular/query/_lazy_string_operations/test_to_int.py diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_to_lowercase.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_lowercase.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/test_to_lowercase.py rename to tests/safeds/data/tabular/query/_lazy_string_operations/test_to_lowercase.py diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_to_uppercase.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_to_uppercase.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/test_to_uppercase.py rename to tests/safeds/data/tabular/query/_lazy_string_operations/test_to_uppercase.py diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_trim.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_trim.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/test_trim.py rename to tests/safeds/data/tabular/query/_lazy_string_operations/test_trim.py diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_trim_end.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_trim_end.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/test_trim_end.py rename to tests/safeds/data/tabular/query/_lazy_string_operations/test_trim_end.py diff --git a/tests/safeds/data/tabular/containers/_lazy_string_cell/test_trim_start.py b/tests/safeds/data/tabular/query/_lazy_string_operations/test_trim_start.py similarity index 100% rename from tests/safeds/data/tabular/containers/_lazy_string_cell/test_trim_start.py rename to tests/safeds/data/tabular/query/_lazy_string_operations/test_trim_start.py diff --git a/tests/safeds/data/tabular/transformation/test_range_scaler.py b/tests/safeds/data/tabular/transformation/test_range_scaler.py index 55a2fcfb1..1853bb291 100644 --- a/tests/safeds/data/tabular/transformation/test_range_scaler.py +++ b/tests/safeds/data/tabular/transformation/test_range_scaler.py @@ -8,7 +8,7 @@ class TestInit: def test_should_raise_value_error(self) -> None: with pytest.raises(ValueError, match='Parameter "max_" must be greater than parameter "min_".'): - _ = RangeScaler(min_=10, max_=0) + _ = RangeScaler(min=10, max=0) class TestFit: @@ -187,8 +187,8 @@ def test_should_return_fitted_transformer_and_transformed_table_with_correct_ran expected: Table, ) -> None: fitted_transformer, transformed_table = RangeScaler( - min_=-10.0, - max_=10.0, + min=-10.0, + max=10.0, selector=column_names, ).fit_and_transform( table, diff --git a/tests/safeds/data/tabular/typing/_polars_column_type/test_datetime.py b/tests/safeds/data/tabular/typing/_polars_column_type/test_datetime.py new file mode 100644 index 000000000..002a19df9 --- /dev/null +++ b/tests/safeds/data/tabular/typing/_polars_column_type/test_datetime.py @@ -0,0 +1,8 @@ +import pytest + +from safeds.data.tabular.typing import ColumnType + + +def test_should_raise_if_time_zone_is_invalid() -> None: + with pytest.raises(ValueError, match="Invalid time zone"): + ColumnType.datetime(time_zone="invalid")