Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 17 additions & 16 deletions src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,21 @@ class Table:
| [from_dict][safeds.data.tabular.containers._table.Table.from_dict] | Create a table from a dictionary. |
| [from_columns][safeds.data.tabular.containers._table.Table.from_columns] | Create a table from a list of columns. |
| [from_rows][safeds.data.tabular.containers._table.Table.from_rows] | Create a table from a list of rows. |

Parameters
----------
data : Mapping[str, Sequence[Any]] | None
The data. If None, an empty table is created.

Raises
------
ColumnLengthMismatchError
If columns have different lengths.

Examples
--------
>>> from safeds.data.tabular.containers import Table
>>> table = Table({"a": [1, 2, 3], "b": [4, 5, 6]})
"""

# ------------------------------------------------------------------------------------------------------------------
Expand All @@ -62,8 +77,6 @@ def from_csv_file(path: str | Path) -> Table:
"""
Read data from a CSV file into a table.

This table is not modified.

Parameters
----------
path : str | Path
Expand Down Expand Up @@ -91,8 +104,6 @@ def from_excel_file(path: str | Path) -> Table:
"""
Read data from an Excel file into a table.

This table is not modified.

Parameters
----------
path : str | Path
Expand Down Expand Up @@ -122,8 +133,6 @@ def from_json_file(path: str | Path) -> Table:
"""
Read data from a JSON file into a table.

This table is not modified.

Parameters
----------
path : str | Path
Expand Down Expand Up @@ -151,8 +160,6 @@ def from_dict(data: dict[str, list[Any]]) -> Table:
"""
Create a table from a dictionary that maps column names to column values.

This table is not modified.

Parameters
----------
data : dict[str, list[Any]]
Expand All @@ -175,8 +182,6 @@ def from_columns(columns: list[Column]) -> Table:
"""
Return a table created from a list of columns.

This table is not modified.

Parameters
----------
columns : list[Column]
Expand Down Expand Up @@ -208,8 +213,6 @@ def from_rows(rows: list[Row]) -> Table:
"""
Return a table created from a list of rows.

This table is not modified.

Parameters
----------
rows : list[Row]
Expand Down Expand Up @@ -245,8 +248,6 @@ def _from_pandas_dataframe(data: pd.DataFrame, schema: Schema | None = None) ->
"""
Create a table from a `pandas.DataFrame`.

This table is not modified.

Parameters
----------
data : pd.DataFrame
Expand Down Expand Up @@ -1070,7 +1071,7 @@ def tag_columns(self, target_name: str, feature_names: list[str] | None = None)
----------
target_name : str
Name of the target column.
feature_names : Optional[list[str]]
feature_names : list[str] | None
Names of the feature columns. If None, all columns except the target column are used.

Returns
Expand All @@ -1080,7 +1081,7 @@ def tag_columns(self, target_name: str, feature_names: list[str] | None = None)
"""
from ._tagged_table import TaggedTable

return TaggedTable(self._data, self._schema, target_name, feature_names)
return TaggedTable._from_table(self, target_name, feature_names)

def transform_column(self, name: str, transformer: Callable[[Row], Any]) -> Table:
"""
Expand Down
125 changes: 114 additions & 11 deletions src/safeds/data/tabular/containers/_tagged_table.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import pandas as pd
from __future__ import annotations

from typing import TYPE_CHECKING

from safeds.data.tabular.containers import Column, Table
from safeds.data.tabular.typing import Schema

if TYPE_CHECKING:
from collections.abc import Mapping, Sequence
from typing import Any


class TaggedTable(Table):
Expand All @@ -10,30 +15,128 @@ class TaggedTable(Table):

Parameters
----------
data : Iterable
data : Mapping[str, Sequence[Any]]
The data.
target_name : str
Name of the target column.
feature_names : Optional[list[str]]
feature_names : list[str] | None
Names of the feature columns. If None, all columns except the target column are used.
schema : Optional[Schema]
The schema of the table. If not specified, the schema will be inferred from the data.

Raises
------
ColumnLengthMismatchError
If columns have different lengths.
ValueError
If the target column is also a feature column.
ValueError
If no feature columns are specified.

Examples
--------
>>> from safeds.data.tabular.containers import Table, TaggedTable
>>> table = Table({"col1": ["a", "b"], "col2": [1, 2]})
>>> tagged_table = table.tag_columns("col2", ["col1"])
"""

# ------------------------------------------------------------------------------------------------------------------
# Creation
# ------------------------------------------------------------------------------------------------------------------

@staticmethod
def _from_table(
table: Table,
target_name: str,
feature_names: list[str] | None = None,
) -> TaggedTable:
"""
Create a tagged table from a table.

Parameters
----------
table : Table
The table.
target_name : str
Name of the target column.
feature_names : list[str] | None
Names of the feature columns. If None, all columns except the target column are used.

Returns
-------
tagged_table : TaggedTable
The created table.

Raises
------
ValueError
If the target column is also a feature column.
ValueError
If no feature columns are specified.

Examples
--------
>>> from safeds.data.tabular.containers import Table, TaggedTable
>>> table = Table({"col1": ["a", "b", "c", "a"], "col2": [1, 2, 3, 4]})
>>> tagged_table = TaggedTable._from_table(table, "col2", ["col1"])
"""
# If no feature names are specified, use all columns except the target column
if feature_names is None:
feature_names = table.column_names
if target_name in feature_names:
feature_names.remove(target_name)

# Validate inputs
if target_name in feature_names:
raise ValueError(f"Column '{target_name}' cannot be both feature and target.")
if len(feature_names) == 0:
raise ValueError("At least one feature column must be specified.")

# Create result
result = object.__new__(TaggedTable)

result._data = table._data
result._schema = table.schema
result._features = result.keep_only_columns(feature_names)
result._target = result.get_column(target_name)

return result

# ------------------------------------------------------------------------------------------------------------------
# Dunder methods
# ------------------------------------------------------------------------------------------------------------------

# noinspection PyMissingConstructor
def __init__(
self,
data: pd.DataFrame,
schema: Schema,
data: Mapping[str, Sequence[Any]],
target_name: str,
feature_names: list[str] | None = None,
):
self._data = data
self._schema = schema
"""
Create a tagged table from a mapping of column names to their values.

Parameters
----------
data : Mapping[str, Sequence[Any]]
The data.
target_name : str
Name of the target column.
feature_names : list[str] | None
Names of the feature columns. If None, all columns except the target column are used.

Raises
------
ColumnLengthMismatchError
If columns have different lengths.
ValueError
If the target column is also a feature column.
ValueError
If no feature columns are specified.

Examples
--------
>>> from safeds.data.tabular.containers import TaggedTable
>>> table = TaggedTable({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", ["a"])
"""
super().__init__(data)

# If no feature names are specified, use all columns except the target column
if feature_names is None:
Expand Down
59 changes: 43 additions & 16 deletions tests/safeds/data/tabular/containers/test_tagged_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,44 +4,71 @@


@pytest.fixture()
def table() -> Table:
return Table(
{
"A": [1, 4],
"B": [2, 5],
"C": [3, 6],
"T": [0, 1],
},
)
def data() -> dict[str, list[int]]:
return {
"A": [1, 4],
"B": [2, 5],
"C": [3, 6],
"T": [0, 1],
}


@pytest.fixture()
def table(data: dict[str, list[int]]) -> Table:
return Table(data)


@pytest.fixture()
def tagged_table(table: Table) -> TaggedTable:
return table.tag_columns(target_name="T")


class TestInit:
class TestFromTable:
def test_should_raise_if_a_feature_does_not_exist(self, table: Table) -> None:
with pytest.raises(UnknownColumnNameError):
table.tag_columns(target_name="T", feature_names=["A", "B", "C", "D"])
TaggedTable._from_table(table, target_name="T", feature_names=["A", "B", "C", "D"])

def test_should_raise_if_target_does_not_exist(self, table: Table) -> None:
with pytest.raises(UnknownColumnNameError):
table.tag_columns(target_name="D")
TaggedTable._from_table(table, target_name="D")

def test_should_raise_if_features_and_target_overlap(self, table: Table) -> None:
with pytest.raises(ValueError, match="Column 'A' cannot be both feature and target."):
table.tag_columns(target_name="A", feature_names=["A", "B", "C"])
TaggedTable._from_table(table, target_name="A", feature_names=["A", "B", "C"])

def test_should_raise_if_features_are_empty_explicitly(self, table: Table) -> None:
with pytest.raises(ValueError, match="At least one feature column must be specified."):
table.tag_columns(target_name="A", feature_names=[])
TaggedTable._from_table(table, target_name="A", feature_names=[])

def test_should_raise_if_features_are_empty_implicitly(self, table: Table) -> None:
def test_should_raise_if_features_are_empty_implicitly(self) -> None:
table = Table({"A": [1, 4]})

with pytest.raises(ValueError, match="At least one feature column must be specified."):
table.tag_columns(target_name="A")
TaggedTable._from_table(table, target_name="A")


class TestInit:
def test_should_raise_if_a_feature_does_not_exist(self, data: dict[str, list[int]]) -> None:
with pytest.raises(UnknownColumnNameError):
TaggedTable(data, target_name="T", feature_names=["A", "B", "C", "D"])

def test_should_raise_if_target_does_not_exist(self, data: dict[str, list[int]]) -> None:
with pytest.raises(UnknownColumnNameError):
TaggedTable(data, target_name="D")

def test_should_raise_if_features_and_target_overlap(self, data: dict[str, list[int]]) -> None:
with pytest.raises(ValueError, match="Column 'A' cannot be both feature and target."):
TaggedTable(data, target_name="A", feature_names=["A", "B", "C"])

def test_should_raise_if_features_are_empty_explicitly(self, data: dict[str, list[int]]) -> None:
with pytest.raises(ValueError, match="At least one feature column must be specified."):
TaggedTable(data, target_name="A", feature_names=[])

def test_should_raise_if_features_are_empty_implicitly(self) -> None:
data = {"A": [1, 4]}

with pytest.raises(ValueError, match="At least one feature column must be specified."):
TaggedTable(data, target_name="A")


class TestFeatures:
Expand Down