Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 31 additions & 5 deletions src/safeds/data/tabular/containers/_cell.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,16 @@ class Cell(ABC, Generic[T_co]):
# ------------------------------------------------------------------------------------------------------------------

@staticmethod
def constant(value: _PythonLiteral | None) -> Cell:
def constant(value: _PythonLiteral | None, *, type: ColumnType | None = None) -> Cell:
"""
Create a cell with a constant value.

Parameters
----------
value:
The value to create the cell from.
type:
The type of the cell. If None, the type is inferred from the value.

Returns
-------
Expand All @@ -77,7 +79,9 @@ def constant(value: _PythonLiteral | None) -> Cell:

from ._lazy_cell import _LazyCell # circular import

return _LazyCell(pl.lit(value))
dtype = type._polars_data_type if type is not None else None

return _LazyCell(pl.lit(value, dtype=dtype))

@staticmethod
def date(
Expand Down Expand Up @@ -1453,7 +1457,7 @@ def cast(self, type: ColumnType) -> Cell:
@property
@abstractmethod
def _polars_expression(self) -> pl.Expr:
"""The Polars expression that corresponds to this cell."""
"""The polars expression that corresponds to this cell."""

@abstractmethod
def _equals(self, other: object) -> bool:
Expand All @@ -1464,10 +1468,32 @@ def _equals(self, other: object) -> bool:
"""


def _to_polars_expression(cell_proxy: _ConvertibleToCell) -> pl.Expr:
def _to_polars_expression(cell_proxy: _ConvertibleToCell, *, type_if_none: ColumnType | None = None) -> pl.Expr:
"""
Convert a cell proxy to a polars expression.

Parameters
----------
cell_proxy:
The cell proxy to convert.
type_if_none:
The type to use if `cell_proxy` is `None`. If `None`, the type is inferred from the context.

Returns
-------
expression:
The polars expression.
"""
import polars as pl

# Cell
if isinstance(cell_proxy, Cell):
return cell_proxy._polars_expression

# Plain value
if cell_proxy is None and type_if_none is not None:
dtype = type_if_none._polars_data_type
else:
return pl.lit(cell_proxy)
dtype = None

return pl.lit(cell_proxy, dtype)
19 changes: 16 additions & 3 deletions src/safeds/data/tabular/query/_duration_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,27 @@
from safeds.data.tabular.containers import Cell


# TODO: Examples with None


class DurationOperations(ABC):
"""
Namespace for operations on durations.

This class cannot be instantiated directly. It can only be accessed using the `dur` attribute of a cell.

Examples
--------
>>> from datetime import timedelta
>>> from safeds.data.tabular.containers import Column
>>> column = Column("a", [timedelta(days=-1), timedelta(days=0), timedelta(days=1)])
>>> column.transform(lambda cell: cell.dur.abs())
+--------------+
| a |
| --- |
| duration[μs] |
+==============+
| 1d |
| 0µs |
| 1d |
+--------------+
"""

# ------------------------------------------------------------------------------------------------------------------
Expand Down
9 changes: 9 additions & 0 deletions src/safeds/data/tabular/query/_lazy_datetime_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from safeds._utils import _structural_hash
from safeds._validation import _convert_and_check_datetime_format
from safeds.data.tabular.containers._cell import _to_polars_expression
from safeds.data.tabular.containers._lazy_cell import _LazyCell

from ._datetime_operations import DatetimeOperations
Expand Down Expand Up @@ -114,6 +115,14 @@ def replace(
second: _ConvertibleToIntCell = None,
microsecond: _ConvertibleToIntCell = None,
) -> Cell:
year = _to_polars_expression(year)
month = _to_polars_expression(month)
day = _to_polars_expression(day)
hour = _to_polars_expression(hour)
minute = _to_polars_expression(minute)
second = _to_polars_expression(second)
microsecond = _to_polars_expression(microsecond)

return _LazyCell(
self._expression.dt.replace(
year=year,
Expand Down
100 changes: 82 additions & 18 deletions src/safeds/data/tabular/query/_lazy_string_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@

from safeds._utils import _structural_hash
from safeds._validation import _check_bounds, _ClosedBound, _convert_and_check_datetime_format
from safeds.data.tabular.containers._cell import _to_polars_expression
from safeds.data.tabular.containers._lazy_cell import _LazyCell
from safeds.data.tabular.typing import ColumnType

from ._string_operations import StringOperations

Expand Down Expand Up @@ -49,27 +51,68 @@ def __str__(self) -> str:
# ------------------------------------------------------------------------------------------------------------------

def contains(self, substring: _ConvertibleToStringCell) -> Cell[bool | None]:
substring = _to_polars_expression(substring, type_if_none=ColumnType.string())

return _LazyCell(self._expression.str.contains(substring, literal=True))

def ends_with(self, suffix: _ConvertibleToStringCell) -> Cell[bool | None]:
suffix = _to_polars_expression(suffix)

return _LazyCell(self._expression.str.ends_with(suffix))

def index_of(self, substring: _ConvertibleToStringCell) -> Cell[int | None]:
substring = _to_polars_expression(substring, type_if_none=ColumnType.string())

return _LazyCell(self._expression.str.find(substring, literal=True))

def length(self, optimize_for_ascii: bool = False) -> Cell[int | None]:
if optimize_for_ascii:
return _LazyCell(self._expression.str.len_bytes())
else:
return _LazyCell(self._expression.str.len_chars())

def ends_with(self, suffix: _ConvertibleToStringCell) -> Cell[bool | None]:
return _LazyCell(self._expression.str.ends_with(suffix))
def pad_end(self, length: int, *, character: str = " ") -> Cell[str | None]:
_check_bounds("length", length, lower_bound=_ClosedBound(0))
if len(character) != 1:
raise ValueError("Can only pad with a single character.")

def index_of(self, substring: _ConvertibleToStringCell) -> Cell[int | None]:
return _LazyCell(self._expression.str.find(substring, literal=True))
return _LazyCell(self._expression.str.pad_end(length, character))

def pad_start(self, length: int, *, character: str = " ") -> Cell[str | None]:
_check_bounds("length", length, lower_bound=_ClosedBound(0))
if len(character) != 1:
raise ValueError("Can only pad with a single character.")

return _LazyCell(self._expression.str.pad_start(length, character))

def repeat(self, count: _ConvertibleToIntCell) -> Cell[str | None]:
if isinstance(count, int):
_check_bounds("count", count, lower_bound=_ClosedBound(0))

count = _to_polars_expression(count)

return _LazyCell(self._expression.repeat_by(count).list.join("", ignore_nulls=False))

def remove_prefix(self, prefix: _ConvertibleToStringCell) -> Cell[str | None]:
prefix = _to_polars_expression(prefix, type_if_none=ColumnType.string())

return _LazyCell(self._expression.str.strip_prefix(prefix))

def remove_suffix(self, suffix: _ConvertibleToStringCell) -> Cell[str | None]:
suffix = _to_polars_expression(suffix, type_if_none=ColumnType.string())

return _LazyCell(self._expression.str.strip_suffix(suffix))

def replace_all(self, old: _ConvertibleToStringCell, new: _ConvertibleToStringCell) -> Cell[str | None]:
old = _to_polars_expression(old, type_if_none=ColumnType.string())
new = _to_polars_expression(new, type_if_none=ColumnType.string())

def replace(self, old: _ConvertibleToStringCell, new: _ConvertibleToStringCell) -> Cell[str | None]:
return _LazyCell(self._expression.str.replace_all(old, new, literal=True))

def starts_with(self, prefix: _ConvertibleToStringCell) -> Cell[bool | None]:
return _LazyCell(self._expression.str.starts_with(prefix))
def reverse(self) -> Cell[str | None]:
return _LazyCell(self._expression.str.reverse())

def substring(
def slice(
self,
*,
start: _ConvertibleToIntCell = 0,
Expand All @@ -78,8 +121,31 @@ def substring(
if isinstance(length, int):
_check_bounds("length", length, lower_bound=_ClosedBound(0))

start = _to_polars_expression(start)
length = _to_polars_expression(length)

return _LazyCell(self._expression.str.slice(start, length))

def starts_with(self, prefix: _ConvertibleToStringCell) -> Cell[bool | None]:
prefix = _to_polars_expression(prefix)

return _LazyCell(self._expression.str.starts_with(prefix))

def strip(self, *, characters: _ConvertibleToStringCell = None) -> Cell[str | None]:
characters = _to_polars_expression(characters)

return _LazyCell(self._expression.str.strip_chars(characters))

def strip_end(self, *, characters: _ConvertibleToStringCell = None) -> Cell[str | None]:
characters = _to_polars_expression(characters)

return _LazyCell(self._expression.str.strip_chars_end(characters))

def strip_start(self, *, characters: _ConvertibleToStringCell = None) -> Cell[str | None]:
characters = _to_polars_expression(characters)

return _LazyCell(self._expression.str.strip_chars_start(characters))

def to_date(self, *, format: str | None = "iso") -> Cell[datetime.date | None]:
if format == "iso":
format = "%F" # noqa: A001
Expand All @@ -96,28 +162,26 @@ def to_datetime(self, *, format: str | None = "iso") -> Cell[datetime.datetime |

return _LazyCell(self._expression.str.to_datetime(format=format, strict=False))

def to_float(self) -> Cell[float | None]:
import polars as pl

return _LazyCell(self._expression.cast(pl.Float64(), strict=False))

def to_int(self, *, base: _ConvertibleToIntCell = 10) -> Cell[int | None]:
base = _to_polars_expression(base)

return _LazyCell(self._expression.str.to_integer(base=base, strict=False))

def to_lowercase(self) -> Cell[str | None]:
return _LazyCell(self._expression.str.to_lowercase())

def to_time(self, *, format: str | None = "iso") -> Cell[datetime.time | None]:
if format == "iso":
format = "%T" # noqa: A001
format = "%T%.f" # noqa: A001
elif format is not None:
format = _convert_and_check_datetime_format(format, type_="time", used_for_parsing=True) # noqa: A001

return _LazyCell(self._expression.str.to_time(format=format, strict=False))

def to_uppercase(self) -> Cell[str | None]:
return _LazyCell(self._expression.str.to_uppercase())

def trim(self) -> Cell[str | None]:
return _LazyCell(self._expression.str.strip_chars())

def trim_end(self) -> Cell[str | None]:
return _LazyCell(self._expression.str.strip_chars_end())

def trim_start(self) -> Cell[str | None]:
return _LazyCell(self._expression.str.strip_chars_start())
2 changes: 1 addition & 1 deletion src/safeds/data/tabular/query/_math_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -669,7 +669,7 @@ def round_to_significant_figures(self, significant_figures: int) -> Cell:
@abstractmethod
def sign(self) -> Cell:
"""
Get the sign (-1 for negative numbers, 0 for zero, and 1 for positive numbers).
Get the sign (-1 if negative, 0 for zero, and 1 if positive).

Note that IEEE 754 defines a negative zero (-0) and a positive zero (+0). This method return a negative zero
for -0 and a positive zero for +0.
Expand Down
Loading
Loading