From d95c79571b2dfd055330274b3de093ac474cd4ac Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 7 May 2023 21:34:55 +0200 Subject: [PATCH 1/4] feat: create `TaggedTable` from `Table` --- src/safeds/data/tabular/containers/_table.py | 14 ---- .../data/tabular/containers/_tagged_table.py | 64 ++++++++++++++++++- .../tabular/containers/test_tagged_table.py | 24 +++++++ 3 files changed, 86 insertions(+), 16 deletions(-) diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 59c4ad01b..307c6a780 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -62,8 +62,6 @@ def from_csv_file(path: str | Path) -> Table: """ Read data from a CSV file into a table. - This table is not modified. - Parameters ---------- path : str | Path @@ -91,8 +89,6 @@ def from_excel_file(path: str | Path) -> Table: """ Read data from an Excel file into a table. - This table is not modified. - Parameters ---------- path : str | Path @@ -122,8 +118,6 @@ def from_json_file(path: str | Path) -> Table: """ Read data from a JSON file into a table. - This table is not modified. - Parameters ---------- path : str | Path @@ -151,8 +145,6 @@ def from_dict(data: dict[str, list[Any]]) -> Table: """ Create a table from a dictionary that maps column names to column values. - This table is not modified. - Parameters ---------- data : dict[str, list[Any]] @@ -175,8 +167,6 @@ def from_columns(columns: list[Column]) -> Table: """ Return a table created from a list of columns. - This table is not modified. - Parameters ---------- columns : list[Column] @@ -208,8 +198,6 @@ def from_rows(rows: list[Row]) -> Table: """ Return a table created from a list of rows. - This table is not modified. - Parameters ---------- rows : list[Row] @@ -245,8 +233,6 @@ def _from_pandas_dataframe(data: pd.DataFrame, schema: Schema | None = None) -> """ Create a table from a `pandas.DataFrame`. - This table is not modified. - Parameters ---------- data : pd.DataFrame diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 3eb2b0f5d..6412fadad 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -1,7 +1,12 @@ -import pandas as pd +from __future__ import annotations + +from typing import TYPE_CHECKING from safeds.data.tabular.containers import Column, Table -from safeds.data.tabular.typing import Schema + +if TYPE_CHECKING: + import pandas as pd + from safeds.data.tabular.typing import Schema class TaggedTable(Table): @@ -20,6 +25,61 @@ class TaggedTable(Table): The schema of the table. If not specified, the schema will be inferred from the data. """ + # ------------------------------------------------------------------------------------------------------------------ + # Creation + # ------------------------------------------------------------------------------------------------------------------ + + @staticmethod + def _from_table( + table: Table, + target_name: str, + feature_names: list[str] | None = None, + ) -> TaggedTable: + """ + Create a tagged table from a table. + + Parameters + ---------- + table : Table + The table. + target_name : str + Name of the target column. + feature_names : list[str] | None + Names of the feature columns. If None, all columns except the target column are used. + + Returns + ------- + tagged_table : TaggedTable + The created table. + + Examples + -------- + >>> from safeds.data.tabular.containers import Table, TaggedTable + >>> table = Table({"col1": ["a", "b", "c", "a"], "col2": [1, 2, 3, 4]}) + >>> tagged_table = TaggedTable._from_table(table, "col2", ["col1"]) + """ + # If no feature names are specified, use all columns except the target column + if feature_names is None: + feature_names = table.column_names + if target_name in feature_names: + feature_names.remove(target_name) + + # Validate inputs + if target_name in feature_names: + raise ValueError(f"Column '{target_name}' cannot be both feature and target.") + if len(feature_names) == 0: + raise ValueError("At least one feature column must be specified.") + + # Create result + result = object.__new__(TaggedTable) + + result._data = table._data + result._schema = table.schema + result._features = result.keep_only_columns(feature_names) + result._target = result.get_column(target_name) + + return result + # ------------------------------------------------------------------------------------------------------------------ # Dunder methods # ------------------------------------------------------------------------------------------------------------------ diff --git a/tests/safeds/data/tabular/containers/test_tagged_table.py b/tests/safeds/data/tabular/containers/test_tagged_table.py index 307450ccd..e9b710b4a 100644 --- a/tests/safeds/data/tabular/containers/test_tagged_table.py +++ b/tests/safeds/data/tabular/containers/test_tagged_table.py @@ -20,6 +20,30 @@ def tagged_table(table: Table) -> TaggedTable: return table.tag_columns(target_name="T") +class TestFromTable: + def test_should_raise_if_a_feature_does_not_exist(self, table: Table) -> None: + with pytest.raises(UnknownColumnNameError): + TaggedTable._from_table(table, target_name="T", feature_names=["A", "B", "C", "D"]) + + def test_should_raise_if_target_does_not_exist(self, table: Table) -> None: + with pytest.raises(UnknownColumnNameError): + TaggedTable._from_table(table, target_name="D") + + def test_should_raise_if_features_and_target_overlap(self, table: Table) -> None: + with pytest.raises(ValueError, match="Column 'A' cannot be both feature and target."): + TaggedTable._from_table(table, target_name="A", feature_names=["A", "B", "C"]) + + def test_should_raise_if_features_are_empty_explicitly(self, table: Table) -> None: + with pytest.raises(ValueError, match="At least one feature column must be specified."): + TaggedTable._from_table(table, target_name="A", feature_names=[]) + + def test_should_raise_if_features_are_empty_implicitly(self, table: Table) -> None: + table = Table({"A": [1, 4]}) + + with pytest.raises(ValueError, match="At least one feature column must be specified."): + TaggedTable._from_table(table, target_name="A") + + class TestInit: def test_should_raise_if_a_feature_does_not_exist(self, table: Table) -> None: with pytest.raises(UnknownColumnNameError): From 89e5bb996677fd357cd5b41de81a0131ffd1c9a7 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 7 May 2023 21:38:30 +0200 Subject: [PATCH 2/4] refactor: call `_from_table` --- src/safeds/data/tabular/containers/_table.py | 4 ++-- src/safeds/data/tabular/containers/_tagged_table.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 307c6a780..3bd96c4a8 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -1056,7 +1056,7 @@ def tag_columns(self, target_name: str, feature_names: list[str] | None = None) ---------- target_name : str Name of the target column. - feature_names : Optional[list[str]] + feature_names : list[str] | None Names of the feature columns. If None, all columns except the target column are used. Returns @@ -1066,7 +1066,7 @@ def tag_columns(self, target_name: str, feature_names: list[str] | None = None) """ from ._tagged_table import TaggedTable - return TaggedTable(self._data, self._schema, target_name, feature_names) + return TaggedTable._from_table(self, target_name, feature_names) def transform_column(self, name: str, transformer: Callable[[Row], Any]) -> Table: """ diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 6412fadad..3e4c8f315 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -17,12 +17,12 @@ class TaggedTable(Table): ---------- data : Iterable The data. + schema : Schema | None + The schema of the table. If not specified, the schema will be inferred from the data. target_name : str Name of the target column. - feature_names : Optional[list[str]] + feature_names : list[str] | None Names of the feature columns. If None, all columns except the target column are used. - schema : Optional[Schema] - The schema of the table. If not specified, the schema will be inferred from the data. """ # ------------------------------------------------------------------------------------------------------------------ From 0ea57c09e4bac3419b7f748b112bbeaaabfc8f86 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 7 May 2023 22:03:30 +0200 Subject: [PATCH 3/4] feat: usable constructor for `TaggedTable` --- src/safeds/data/tabular/containers/_table.py | 15 +++++ .../data/tabular/containers/_tagged_table.py | 63 ++++++++++++++++--- .../tabular/containers/test_tagged_table.py | 45 ++++++------- 3 files changed, 92 insertions(+), 31 deletions(-) diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 3bd96c4a8..c05d47c1a 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -51,6 +51,21 @@ class Table: | [from_dict][safeds.data.tabular.containers._table.Table.from_dict] | Create a table from a dictionary. | | [from_columns][safeds.data.tabular.containers._table.Table.from_columns] | Create a table from a list of columns. | | [from_rows][safeds.data.tabular.containers._table.Table.from_rows] | Create a table from a list of rows. | + + Parameters + ---------- + data : Mapping[str, Sequence[Any]] | None + The data. If None, an empty table is created. + + Raises + ------ + ColumnLengthMismatchError + If columns have different lengths. + + Examples + -------- + >>> from safeds.data.tabular.containers import Table + >>> table = Table({"a": [1, 2, 3], "b": [4, 5, 6]}) """ # ------------------------------------------------------------------------------------------------------------------ diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 3e4c8f315..457825441 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -5,8 +5,8 @@ from safeds.data.tabular.containers import Column, Table if TYPE_CHECKING: - import pandas as pd - from safeds.data.tabular.typing import Schema + from typing import Any + from collections.abc import Mapping, Sequence class TaggedTable(Table): @@ -15,14 +15,27 @@ class TaggedTable(Table): Parameters ---------- - data : Iterable + data : Mapping[str, Sequence[Any]] The data. - schema : Schema | None - The schema of the table. If not specified, the schema will be inferred from the data. target_name : str Name of the target column. feature_names : list[str] | None Names of the feature columns. If None, all columns except the target column are used. + + Raises + ------ + ColumnLengthMismatchError + If columns have different lengths. + ValueError + If the target column is also a feature column. + ValueError + If no feature columns are specified. + + Examples + -------- + >>> from safeds.data.tabular.containers import Table, TaggedTable + >>> table = Table({"col1": ["a", "b"], "col2": [1, 2]}) + >>> tagged_table = table.tag_columns("col2", ["col1"]) """ # ------------------------------------------------------------------------------------------------------------------ @@ -52,6 +65,13 @@ def _from_table( tagged_table : TaggedTable The created table. + Raises + ------ + ValueError + If the target column is also a feature column. + ValueError + If no feature columns are specified. + Examples -------- >>> from safeds.data.tabular.containers import Table, TaggedTable @@ -84,16 +104,39 @@ def _from_table( # Dunder methods # ------------------------------------------------------------------------------------------------------------------ - # noinspection PyMissingConstructor def __init__( self, - data: pd.DataFrame, - schema: Schema, + data: Mapping[str, Sequence[Any]], target_name: str, feature_names: list[str] | None = None, ): - self._data = data - self._schema = schema + """ + Create a tagged table from a mapping of column names to their values. + + Parameters + ---------- + data : Mapping[str, Sequence[Any]] + The data. + target_name : str + Name of the target column. + feature_names : list[str] | None + Names of the feature columns. If None, all columns except the target column are used. + + Raises + ------ + ColumnLengthMismatchError + If columns have different lengths. + ValueError + If the target column is also a feature column. + ValueError + If no feature columns are specified. + + Examples + -------- + >>> from safeds.data.tabular.containers import TaggedTable + >>> table = TaggedTable({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", ["a"]) + """ + super().__init__(data) # If no feature names are specified, use all columns except the target column if feature_names is None: diff --git a/tests/safeds/data/tabular/containers/test_tagged_table.py b/tests/safeds/data/tabular/containers/test_tagged_table.py index e9b710b4a..9a1ae7210 100644 --- a/tests/safeds/data/tabular/containers/test_tagged_table.py +++ b/tests/safeds/data/tabular/containers/test_tagged_table.py @@ -4,15 +4,18 @@ @pytest.fixture() -def table() -> Table: - return Table( - { - "A": [1, 4], - "B": [2, 5], - "C": [3, 6], - "T": [0, 1], - }, - ) +def data() -> dict[str, list[int]]: + return { + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + } + + +@pytest.fixture() +def table(data: dict[str, list[int]]) -> Table: + return Table(data) @pytest.fixture() @@ -37,7 +40,7 @@ def test_should_raise_if_features_are_empty_explicitly(self, table: Table) -> No with pytest.raises(ValueError, match="At least one feature column must be specified."): TaggedTable._from_table(table, target_name="A", feature_names=[]) - def test_should_raise_if_features_are_empty_implicitly(self, table: Table) -> None: + def test_should_raise_if_features_are_empty_implicitly(self) -> None: table = Table({"A": [1, 4]}) with pytest.raises(ValueError, match="At least one feature column must be specified."): @@ -45,27 +48,27 @@ def test_should_raise_if_features_are_empty_implicitly(self, table: Table) -> No class TestInit: - def test_should_raise_if_a_feature_does_not_exist(self, table: Table) -> None: + def test_should_raise_if_a_feature_does_not_exist(self, data: dict[str, list[int]]) -> None: with pytest.raises(UnknownColumnNameError): - table.tag_columns(target_name="T", feature_names=["A", "B", "C", "D"]) + TaggedTable(data, target_name="T", feature_names=["A", "B", "C", "D"]) - def test_should_raise_if_target_does_not_exist(self, table: Table) -> None: + def test_should_raise_if_target_does_not_exist(self, data: dict[str, list[int]]) -> None: with pytest.raises(UnknownColumnNameError): - table.tag_columns(target_name="D") + TaggedTable(data, target_name="D") - def test_should_raise_if_features_and_target_overlap(self, table: Table) -> None: + def test_should_raise_if_features_and_target_overlap(self, data: dict[str, list[int]]) -> None: with pytest.raises(ValueError, match="Column 'A' cannot be both feature and target."): - table.tag_columns(target_name="A", feature_names=["A", "B", "C"]) + TaggedTable(data, target_name="A", feature_names=["A", "B", "C"]) - def test_should_raise_if_features_are_empty_explicitly(self, table: Table) -> None: + def test_should_raise_if_features_are_empty_explicitly(self, data: dict[str, list[int]]) -> None: with pytest.raises(ValueError, match="At least one feature column must be specified."): - table.tag_columns(target_name="A", feature_names=[]) + TaggedTable(data, target_name="A", feature_names=[]) - def test_should_raise_if_features_are_empty_implicitly(self, table: Table) -> None: - table = Table({"A": [1, 4]}) + def test_should_raise_if_features_are_empty_implicitly(self) -> None: + data = {"A": [1, 4]} with pytest.raises(ValueError, match="At least one feature column must be specified."): - table.tag_columns(target_name="A") + TaggedTable(data, target_name="A") class TestFeatures: From dd3663100e76f29ac0af101eff5dd9986cc0ba2e Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Sun, 7 May 2023 20:09:00 +0000 Subject: [PATCH 4/4] style: apply automated linter fixes --- src/safeds/data/tabular/containers/_tagged_table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 457825441..d06fa154b 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -5,8 +5,8 @@ from safeds.data.tabular.containers import Column, Table if TYPE_CHECKING: - from typing import Any from collections.abc import Mapping, Sequence + from typing import Any class TaggedTable(Table):