From 92d890917bed228c3ec36f81cfa2d283609d5da0 Mon Sep 17 00:00:00 2001 From: xXstupidnameXx Date: Fri, 21 Jun 2024 17:01:30 +0200 Subject: [PATCH 01/17] first implementation of SequentialTableTransformer --- .../data/tabular/transformation/__init__.py | 3 + .../_sequential_table_transformer.py | 146 ++++++++++++++++++ 2 files changed, 149 insertions(+) create mode 100644 src/safeds/data/tabular/transformation/_sequential_table_transformer.py diff --git a/src/safeds/data/tabular/transformation/__init__.py b/src/safeds/data/tabular/transformation/__init__.py index b7f19d22e..e974c4b94 100644 --- a/src/safeds/data/tabular/transformation/__init__.py +++ b/src/safeds/data/tabular/transformation/__init__.py @@ -10,6 +10,7 @@ from ._label_encoder import LabelEncoder from ._one_hot_encoder import OneHotEncoder from ._range_scaler import RangeScaler + from ._sequential_table_transformer import SequentialTableTransformer from ._simple_imputer import SimpleImputer from ._standard_scaler import StandardScaler from ._table_transformer import TableTransformer @@ -22,6 +23,7 @@ "LabelEncoder": "._label_encoder:LabelEncoder", "OneHotEncoder": "._one_hot_encoder:OneHotEncoder", "RangeScaler": "._range_scaler:RangeScaler", + "SequentialTableTransformer": "._sequential_table_transformer:SequentialTableTransformer", "SimpleImputer": "._simple_imputer:SimpleImputer", "StandardScaler": "._standard_scaler:StandardScaler", "TableTransformer": "._table_transformer:TableTransformer", @@ -34,6 +36,7 @@ "LabelEncoder", "OneHotEncoder", "RangeScaler", + "SequentialTableTransformer", "SimpleImputer", "StandardScaler", "TableTransformer", diff --git a/src/safeds/data/tabular/transformation/_sequential_table_transformer.py b/src/safeds/data/tabular/transformation/_sequential_table_transformer.py new file mode 100644 index 000000000..69991a006 --- /dev/null +++ b/src/safeds/data/tabular/transformation/_sequential_table_transformer.py @@ -0,0 +1,146 @@ +from __future__ import annotations + +from typing import Any + +from safeds._utils import _structural_hash +from safeds.data.tabular.containers import Table +from ._table_transformer import TableTransformer +from safeds.exceptions import TransformerNotFittedError, SafeDsError + +from ._invertible_table_transformer import InvertibleTableTransformer + +class SequentialTableTransformer(InvertibleTableTransformer): + """ + The SequentialTableTransforrmer transforms a table using multiple trnasformers in sequence. + + Parameters + ---------- + transformers: + The list of transformers used to transform the table. Used in the order as they are supplied in the list. + + Raises + ------ + ValueError: + Raises a ValueError if the list of Transformers is None or contains no transformers. + """ + + def _init_( + self, + *, + transformers: list[TableTransformer]#, + #column_names: str | list[str] | None = None + ) -> None: + super().__init__(None) + + #Check if transformers actually contains any transformers. + if transformers == None or len(transformers) == 0: + raise ValueError("transformers must contain at least 1 transformer") + + # Parameters + self._transformers: list[TableTransformer] = transformers + + # Internal State + self._is_fitted: bool = False + + def __hash__(self) -> int: + return _structural_hash( + super().__hash__(), + self._transformers, + self._is_fitted + ) + + def fit(self, table: Table) -> SequentialTableTransformer: + """ + Fits all of the transformers in order. + + Parameters + ---------- + table: + The table used to fit the transformers. + + Returns + ------- + The fitted transformer. + + Raises + ------ + ValueError: + Raises a ValueError if the table has no rows. + """ + if table.row_count == 0: + raise ValueError("The SequentialTable cannot be fitted because the table contains 0 rows") + + current_table: Table = table + result: SequentialTableTransformer = SequentialTableTransformer( + transformer=self._transformers, column_names=self._column_names) + + for transformer in result._transformers: + transformer = transformer.fit(current_table) + current_table = transformer.transform(current_table) + + result._is_fitted = True + return result + + def transform(self, table:Table) -> Table: + """ + Transforms the table using all the transformers sequentially. + + Parameters + ---------- + table: + The table to be transformed. + + Returns + ------- + The transformed table. + + Raises + ------ + TransformerNotFittedError: + Raises a TransformerNotFittedError if the transformer isn't fitted. + """ + + if not self._is_fitted: + raise TransformerNotFittedError + + current_table: Table = table + for transformer in self._transformers: + current_table = transformer.transform(current_table) + + return current_table + + def inverse_transform(self, transformed_table:Table) -> Table: + #TODO: Replace SafeDsError with a custom error. + """ + Inversely transforms the table using all the transformers sequentially in inverse order. + + Parameters + ---------- + table: + The table to be transformed back. + + Returns + ------- + The untranformed table. + + Raises + ------ + TransformerNotFittedError: + Raises a TransformerNotFittedError if the transformer isn't fitted. + SafeDsError: + Raises a SafeDsError if one of the transformers isn't invertable. + """ + if not self._is_fitted: + raise TransformerNotFittedError + + #check if transformer is invertable + for transformer in self._transformers: + if not (hasattr(transformer, "inverse_transform") and callable(getattr(transformer, "inverse_transform"))): + raise SafeDsError(str(type(transformer)) + " is not invertable!") + + #sequentially inverse transform the table with all transformers, working from the back of the list forwards. + current_table: Table = transformed_table + for transformer in reversed(self._transformers): + current_table = transformer.inverse_transform(current_table) + + return current_table \ No newline at end of file From 73e765d480311df0c3d9d5ca77355404a6971c9e Mon Sep 17 00:00:00 2001 From: xXstupidnameXx Date: Fri, 28 Jun 2024 10:25:14 +0200 Subject: [PATCH 02/17] implemented TransformerNotInvertableError --- .../transformation/_sequential_table_transformer.py | 8 +++----- src/safeds/exceptions/__init__.py | 2 ++ src/safeds/exceptions/_data.py | 7 +++++++ 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_sequential_table_transformer.py b/src/safeds/data/tabular/transformation/_sequential_table_transformer.py index 69991a006..1153d6f00 100644 --- a/src/safeds/data/tabular/transformation/_sequential_table_transformer.py +++ b/src/safeds/data/tabular/transformation/_sequential_table_transformer.py @@ -5,7 +5,7 @@ from safeds._utils import _structural_hash from safeds.data.tabular.containers import Table from ._table_transformer import TableTransformer -from safeds.exceptions import TransformerNotFittedError, SafeDsError +from safeds.exceptions import TransformerNotFittedError, TransformerNotInvertableError from ._invertible_table_transformer import InvertibleTableTransformer @@ -27,8 +27,7 @@ class SequentialTableTransformer(InvertibleTableTransformer): def _init_( self, *, - transformers: list[TableTransformer]#, - #column_names: str | list[str] | None = None + transformers: list[TableTransformer] ) -> None: super().__init__(None) @@ -110,7 +109,6 @@ def transform(self, table:Table) -> Table: return current_table def inverse_transform(self, transformed_table:Table) -> Table: - #TODO: Replace SafeDsError with a custom error. """ Inversely transforms the table using all the transformers sequentially in inverse order. @@ -136,7 +134,7 @@ def inverse_transform(self, transformed_table:Table) -> Table: #check if transformer is invertable for transformer in self._transformers: if not (hasattr(transformer, "inverse_transform") and callable(getattr(transformer, "inverse_transform"))): - raise SafeDsError(str(type(transformer)) + " is not invertable!") + raise TransformerNotInvertableError(str(type(transformer))) #sequentially inverse transform the table with all transformers, working from the back of the list forwards. current_table: Table = transformed_table diff --git a/src/safeds/exceptions/__init__.py b/src/safeds/exceptions/__init__.py index 2f84387c9..17d8ac18e 100644 --- a/src/safeds/exceptions/__init__.py +++ b/src/safeds/exceptions/__init__.py @@ -11,6 +11,7 @@ NonNumericColumnError, OutputLengthMismatchError, TransformerNotFittedError, + TransformerNotInvertableError, ValueNotPresentWhenFittedError, ) from ._ml import ( @@ -65,6 +66,7 @@ class OutOfBoundsError(SafeDsError): "NonNumericColumnError", "OutputLengthMismatchError", "TransformerNotFittedError", + "TransformerNotInvertableError", "ValueNotPresentWhenFittedError", # ML exceptions "DatasetMissesDataError", diff --git a/src/safeds/exceptions/_data.py b/src/safeds/exceptions/_data.py index dc729dae0..080da630f 100644 --- a/src/safeds/exceptions/_data.py +++ b/src/safeds/exceptions/_data.py @@ -111,6 +111,13 @@ def __init__(self) -> None: super().__init__("The transformer has not been fitted yet.") +class TransformerNotInvertableError(Exception): + """Raised when a function tries to invert a non-invertable transformer.""" + + def __init__(self, transformer_type: str) -> None: + super().__init__(f"{transformer_type} is not invertable.") + + class ValueNotPresentWhenFittedError(Exception): """Exception raised when attempting to one-hot-encode a table containing values not present in the fitting phase.""" From 3c5619a565ef71bf825ecf08ee36ba358e482a89 Mon Sep 17 00:00:00 2001 From: xXstupidnameXx Date: Fri, 28 Jun 2024 15:15:10 +0200 Subject: [PATCH 03/17] all current tests passed. --- .../_sequential_table_transformer.py | 40 ++-- .../test_sequential_table_transformer.py | 192 ++++++++++++++++++ 2 files changed, 219 insertions(+), 13 deletions(-) create mode 100644 tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py diff --git a/src/safeds/data/tabular/transformation/_sequential_table_transformer.py b/src/safeds/data/tabular/transformation/_sequential_table_transformer.py index 1153d6f00..49522eafb 100644 --- a/src/safeds/data/tabular/transformation/_sequential_table_transformer.py +++ b/src/safeds/data/tabular/transformation/_sequential_table_transformer.py @@ -11,7 +11,7 @@ class SequentialTableTransformer(InvertibleTableTransformer): """ - The SequentialTableTransforrmer transforms a table using multiple trnasformers in sequence. + The SequentialTableTransforrmer transforms a table using multiple transformers in sequence. Parameters ---------- @@ -24,10 +24,11 @@ class SequentialTableTransformer(InvertibleTableTransformer): Raises a ValueError if the list of Transformers is None or contains no transformers. """ - def _init_( - self, - *, - transformers: list[TableTransformer] + def __init__( + self, + transformers: list[TableTransformer], + *, + column_names: str | list[str] | None = None ) -> None: super().__init__(None) @@ -47,6 +48,16 @@ def __hash__(self) -> int: self._transformers, self._is_fitted ) + + def is_fitted(self) -> bool: + """ + Whether the transformer is fitted. + + Returns + ------- + True, if the transformer is fitted, False otherwise. + """ + return self._is_fitted def fit(self, table: Table) -> SequentialTableTransformer: """ @@ -67,16 +78,19 @@ def fit(self, table: Table) -> SequentialTableTransformer: Raises a ValueError if the table has no rows. """ if table.row_count == 0: - raise ValueError("The SequentialTable cannot be fitted because the table contains 0 rows") + raise ValueError("The SequentialTableTransformer cannot be fitted because the table contains 0 rows.") current_table: Table = table - result: SequentialTableTransformer = SequentialTableTransformer( - transformer=self._transformers, column_names=self._column_names) + fitted_transformers: list[TableTransformer] = [] - for transformer in result._transformers: - transformer = transformer.fit(current_table) - current_table = transformer.transform(current_table) + for transformer in self._transformers: + fitted_transformer = transformer.fit(current_table) + fitted_transformers.append(fitted_transformer) + current_table = fitted_transformer.transform(current_table) + result: SequentialTableTransformer = SequentialTableTransformer( + transformers=fitted_transformers, column_names=self._column_names) + result._is_fitted = True return result @@ -125,8 +139,8 @@ def inverse_transform(self, transformed_table:Table) -> Table: ------ TransformerNotFittedError: Raises a TransformerNotFittedError if the transformer isn't fitted. - SafeDsError: - Raises a SafeDsError if one of the transformers isn't invertable. + TransformerNotInvertableError: + Raises a TransformerNotInvertableError if one of the transformers isn't invertable. """ if not self._is_fitted: raise TransformerNotFittedError diff --git a/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py b/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py new file mode 100644 index 000000000..49ac6a98d --- /dev/null +++ b/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py @@ -0,0 +1,192 @@ +import pytest +from safeds.data.tabular.containers import Table +from safeds.data.tabular.transformation import * +from safeds.exceptions import TransformerNotFittedError +from safeds.exceptions import TransformerNotInvertableError + +from tests.helpers import assert_tables_equal + + +class TestInit: + def test_should_raise_value_error_on_none(self) -> None: + with pytest.raises(ValueError, match=("transformers must contain at least 1 transformer")): + SequentialTableTransformer(transformers = None) + + def test_should_raise_value_error_on_empty_list(self) -> None: + with pytest.raises(ValueError, match=("transformers must contain at least 1 transformer")): + SequentialTableTransformer(transformers = []) + +class TestFit: + def test_should_raise_value_error_on_empty_table(self) -> None: + one_hot = OneHotEncoder() + imputer = SimpleImputer(SimpleImputer.Strategy.constant(0)) + transformers = [one_hot, imputer] + test_table = Table( + { + "col1": [], + "col2": [], + }, + ) + sequentialTableTransformer = SequentialTableTransformer(transformers) + with pytest.raises(ValueError, match=("The SequentialTableTransformer cannot be fitted because the table contains 0 rows.")): + sequentialTableTransformer.fit(test_table) + + def test_fit_does_not_change_original_transformer(self): + one_hot = OneHotEncoder() + imputer = SimpleImputer(SimpleImputer.Strategy.constant(0)) + transformer_list = [one_hot, imputer] + test_table = Table( + { + "col1": [1,2,None], + "col2": ["a", "b", "a"], + }, + ) + sequentialTableTransformer = SequentialTableTransformer(transformers=transformer_list) + old_hash = hash(sequentialTableTransformer) + sequentialTableTransformer.fit(test_table) + assert old_hash == hash(sequentialTableTransformer) + +class TestTransform: + def test_should_raise_if_not_fitted(self): + one_hot = OneHotEncoder() + imputer = SimpleImputer(SimpleImputer.Strategy.constant(0)) + transformers = [one_hot, imputer] + test_table = Table( + { + "col1": [1,2,None], + "col2": ["a", "b", "a"], + }, + ) + sequentialTableTransformer = SequentialTableTransformer(transformers) + with pytest.raises(TransformerNotFittedError, match=r"The transformer has not been fitted yet."): + sequentialTableTransformer.transform(test_table) + + @pytest.mark.parametrize( + "transformers",[ + OneHotEncoder, + SimpleImputer(SimpleImputer.Strategy.constant(0)), + LabelEncoder(), + SimpleImputer(SimpleImputer.Strategy.mean())], + ids=["OneHotEncoder", "Imputer with Constant", "LabelEncoder", "Mean Imputer"], + ) + def test_should_do_same_as_transformer_with_single_transformer(self, transformer: TableTransformer): + sequential_transformer = SequentialTableTransformer([transformer]) + test_table = Table( + { + "col1": [1,2,None], + "col2": ["a", "b", "a"], + }, + ) + sequential_transformer = sequential_transformer.fit(test_table) + transformer = transformer.fit(test_table) + test_table_normal = transformer.transform(test_table) + test_table_sequential = sequential_transformer.transform(test_table) + assert_tables_equal(test_table_normal, test_table_sequential) + + def test_transforms_correctly_with_multiple_transformers(self): + one_hot = OneHotEncoder() + imputer = SimpleImputer(SimpleImputer.Strategy.constant(0)) + transformers = [one_hot, imputer] + test_table = Table( + { + "col1": [1,2,None], + "col2": ["a", "b", "a"], + }, + ) + sequentialTableTransformer = SequentialTableTransformer(transformers) + fitted_sequentialTableTransformer = sequentialTableTransformer.fit(test_table) + transfromed_table_sequential = fitted_sequentialTableTransformer.transform(test_table) + + one_hot = one_hot.fit(test_table) + transormed_table_individual = one_hot.transform(test_table) + imputer = imputer.fit(transormed_table_individual) + transormed_table_individual = imputer.transform(transormed_table_individual) + + assert_tables_equal(transfromed_table_sequential, transormed_table_individual) + +class TestIsFitted: + def test_should_return_false_before_fiting(self): + one_hot = OneHotEncoder() + imputer = SimpleImputer(SimpleImputer.Strategy.constant(0)) + transformers = [one_hot, imputer] + sequentialTableTransformer = SequentialTableTransformer(transformers) + assert sequentialTableTransformer.is_fitted() == False + + def test_should_return_true_after_fiting(self): + one_hot = OneHotEncoder() + imputer = SimpleImputer(SimpleImputer.Strategy.constant(0)) + transformers = [one_hot, imputer] + test_table = Table( + { + "col1": [1,2,None], + "col2": ["a", "b", "a"], + }, + ) + sequentialTableTransformer = SequentialTableTransformer(transformers) + sequentialTableTransformer = sequentialTableTransformer.fit(test_table) + assert sequentialTableTransformer.is_fitted() == True + +class TestInverseTransform: + + @pytest.mark.parametrize( + "transformers",[ + [Discretizer(bin_count=3, column_names="col1")], + [SimpleImputer(SimpleImputer.Strategy.constant(0))], + [SimpleImputer(SimpleImputer.Strategy.constant(0)), Discretizer(bin_count=3)], + [LabelEncoder(column_names="col2", partial_order=["a","b","c"]), SimpleImputer(SimpleImputer.Strategy.mean())], + ], + ids=["Discretizer", "SimpleImputer", "Multiple non-invertable", "invertable and non-invertable"], + ) + def test_should_raise_TransformerNotInvertableError_on_non_invertable_transformers(self,transformers): + test_table = Table( + { + "col1": [0.1,0.113,0.232,1.199,2.33,2.01,2.99], + "col2": ["a","a","c","b","a","a","c"], + "col3": [1,1,None,3,14,None,7], + }, + ) + sequentialTableTransformer = SequentialTableTransformer(transformers) + sequentialTableTransformer = sequentialTableTransformer.fit(test_table) + transformed_table = sequentialTableTransformer.transform(test_table) + with pytest.raises(TransformerNotInvertableError, match=r".*is not invertable."): + sequentialTableTransformer.inverse_transform(transformed_table) + + # Currently doesn't work as StandardScaler changes int to float and OneHotEncoder changes column order. + # @pytest.mark.parametrize( + # "transformers",[ + # [OneHotEncoder()], + # [OneHotEncoder(),StandardScaler()], + # [LabelEncoder(column_names="col2", partial_order=["a","b","c"]), OneHotEncoder(), StandardScaler()], + # [LabelEncoder(),LabelEncoder()], + # ], + # ids=["1 Transformer", "2 Transformers", "3 Transformers", "Duplicate Transformers"], + # ) + # def test_should_return_original_table(self,transformers): + # test_table = Table( + # { + # "col1": [0.1,0.113,0.232,1.199,2.33,2.01,2.99], + # "col2": ["a","a","c","b","a","a","c"], + # "col3": [1,1,0,3,14,0,7], + # "col4": ["one", "two", "one", "two", "one", "two", "one"], + # }, + # ) + # sequentialTableTransformer = SequentialTableTransformer(transformers) + # sequentialTableTransformer = sequentialTableTransformer.fit(test_table) + # transformed_table = sequentialTableTransformer.transform(test_table) + # inverse_transformed_table = sequentialTableTransformer.inverse_transform(transformed_table) + # assert_tables_equal(test_table, inverse_transformed_table) + + def test_should_raise_TransformerNotFittedError_if_not_fited(self): + one_hot = OneHotEncoder() + imputer = SimpleImputer(SimpleImputer.Strategy.constant(0)) + transformers = [one_hot, imputer] + sequentialTableTransformer = SequentialTableTransformer(transformers) + test_table = Table( + { + "col1": [1,2,None], + "col2": ["a", "b", "a"], + } + ) + with pytest.raises(TransformerNotFittedError, match=r"The transformer has not been fitted yet."): + sequentialTableTransformer.inverse_transform(test_table) + From d847ab0fcf702f33c9b4de33ef3820fd6fa71c44 Mon Sep 17 00:00:00 2001 From: xXstupidnameXx Date: Fri, 28 Jun 2024 16:34:54 +0200 Subject: [PATCH 04/17] improved tests --- tests/helpers/_assertions.py | 25 +++++++++++++++++-- .../test_sequential_table_transformer.py | 8 +++--- 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/tests/helpers/_assertions.py b/tests/helpers/_assertions.py index 4c4847c6e..e65cba137 100644 --- a/tests/helpers/_assertions.py +++ b/tests/helpers/_assertions.py @@ -6,7 +6,14 @@ from safeds.data.tabular.containers import Cell, Column, Table -def assert_tables_equal(table1: Table, table2: Table) -> None: +def assert_tables_equal( + table1: Table, + table2: Table, + *, + ignore_column_order: bool = False, + ignore_row_order: bool = False, + ignore_types: bool = False, + check_exact: bool = False) -> None: """ Assert that two tables are almost equal. @@ -16,8 +23,22 @@ def assert_tables_equal(table1: Table, table2: Table) -> None: The first table. table2: The table to compare the first table to. + ignore_column_order: + Ignore the column order when True. Will return true, even when the column order is different. + ignore_row_order: + Ignore the column order when True. Will return true, even when the row order is different. + ignore_types: + Ignore differing data Types. Will return true, even when columns have differing data types. + check_exact: + If True, check, if floating point values match EXACTLY. """ - assert_frame_equal(table1._data_frame, table2._data_frame) + assert_frame_equal( + table1._data_frame, + table2._data_frame, + check_row_order = not ignore_row_order, + check_column_order = not ignore_column_order, + check_dtypes = not ignore_types, + check_exact=check_exact) def assert_that_tabular_datasets_are_equal(table1: TabularDataset, table2: TabularDataset) -> None: diff --git a/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py b/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py index 49ac6a98d..127865bd2 100644 --- a/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py +++ b/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py @@ -151,12 +151,12 @@ def test_should_raise_TransformerNotInvertableError_on_non_invertable_transforme with pytest.raises(TransformerNotInvertableError, match=r".*is not invertable."): sequentialTableTransformer.inverse_transform(transformed_table) - # Currently doesn't work as StandardScaler changes int to float and OneHotEncoder changes column order. + # Fails because of floating point inprecision. 1.0000000000000004 != 1 # @pytest.mark.parametrize( # "transformers",[ # [OneHotEncoder()], - # [OneHotEncoder(),StandardScaler()], - # [LabelEncoder(column_names="col2", partial_order=["a","b","c"]), OneHotEncoder(), StandardScaler()], + # [OneHotEncoder(),StandardScaler(column_names=["col1","col3"])], + # [LabelEncoder(column_names="col2", partial_order=["a","b","c"]), OneHotEncoder(), StandardScaler(column_names=["col1","col3"])], # [LabelEncoder(),LabelEncoder()], # ], # ids=["1 Transformer", "2 Transformers", "3 Transformers", "Duplicate Transformers"], @@ -174,7 +174,7 @@ def test_should_raise_TransformerNotInvertableError_on_non_invertable_transforme # sequentialTableTransformer = sequentialTableTransformer.fit(test_table) # transformed_table = sequentialTableTransformer.transform(test_table) # inverse_transformed_table = sequentialTableTransformer.inverse_transform(transformed_table) - # assert_tables_equal(test_table, inverse_transformed_table) + # assert_tables_equal(test_table, inverse_transformed_table, ignore_column_order=True, ignore_types=True) def test_should_raise_TransformerNotFittedError_if_not_fited(self): one_hot = OneHotEncoder() From 4029628d484881d9125a7a943a085418755c569a Mon Sep 17 00:00:00 2001 From: xXstupidnameXx Date: Fri, 5 Jul 2024 11:10:25 +0200 Subject: [PATCH 05/17] finished tests --- .../_sequential_table_transformer.py | 1 + .../test_sequential_table_transformer.py | 47 +++++++++---------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_sequential_table_transformer.py b/src/safeds/data/tabular/transformation/_sequential_table_transformer.py index 49522eafb..d269f53c2 100644 --- a/src/safeds/data/tabular/transformation/_sequential_table_transformer.py +++ b/src/safeds/data/tabular/transformation/_sequential_table_transformer.py @@ -125,6 +125,7 @@ def transform(self, table:Table) -> Table: def inverse_transform(self, transformed_table:Table) -> Table: """ Inversely transforms the table using all the transformers sequentially in inverse order. + Might change the order and type of columns base on the transformers used. Parameters ---------- diff --git a/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py b/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py index 127865bd2..8792e0aa9 100644 --- a/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py +++ b/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py @@ -151,30 +151,29 @@ def test_should_raise_TransformerNotInvertableError_on_non_invertable_transforme with pytest.raises(TransformerNotInvertableError, match=r".*is not invertable."): sequentialTableTransformer.inverse_transform(transformed_table) - # Fails because of floating point inprecision. 1.0000000000000004 != 1 - # @pytest.mark.parametrize( - # "transformers",[ - # [OneHotEncoder()], - # [OneHotEncoder(),StandardScaler(column_names=["col1","col3"])], - # [LabelEncoder(column_names="col2", partial_order=["a","b","c"]), OneHotEncoder(), StandardScaler(column_names=["col1","col3"])], - # [LabelEncoder(),LabelEncoder()], - # ], - # ids=["1 Transformer", "2 Transformers", "3 Transformers", "Duplicate Transformers"], - # ) - # def test_should_return_original_table(self,transformers): - # test_table = Table( - # { - # "col1": [0.1,0.113,0.232,1.199,2.33,2.01,2.99], - # "col2": ["a","a","c","b","a","a","c"], - # "col3": [1,1,0,3,14,0,7], - # "col4": ["one", "two", "one", "two", "one", "two", "one"], - # }, - # ) - # sequentialTableTransformer = SequentialTableTransformer(transformers) - # sequentialTableTransformer = sequentialTableTransformer.fit(test_table) - # transformed_table = sequentialTableTransformer.transform(test_table) - # inverse_transformed_table = sequentialTableTransformer.inverse_transform(transformed_table) - # assert_tables_equal(test_table, inverse_transformed_table, ignore_column_order=True, ignore_types=True) + @pytest.mark.parametrize( + "transformers",[ + [OneHotEncoder()], + [OneHotEncoder(),StandardScaler(column_names=["col1","col3"])], + [LabelEncoder(column_names="col2", partial_order=["a","b","c"]), OneHotEncoder(), StandardScaler(column_names=["col1","col3"])], + [LabelEncoder(),LabelEncoder()], + ], + ids=["1 Transformer", "2 Transformers", "3 Transformers", "Duplicate Transformers"], + ) + def test_should_return_original_table(self,transformers): + test_table = Table( + { + "col1": [0.1,0.113,0.232,1.199,2.33,2.01,2.99], + "col2": ["a","a","c","b","a","a","c"], + "col3": [1.0,1.0,0.0,3.0,14.0,0.0,7.0], + "col4": ["one", "two", "one", "two", "one", "two", "one"], + }, + ) + sequentialTableTransformer = SequentialTableTransformer(transformers) + sequentialTableTransformer = sequentialTableTransformer.fit(test_table) + transformed_table = sequentialTableTransformer.transform(test_table) + inverse_transformed_table = sequentialTableTransformer.inverse_transform(transformed_table) + assert_tables_equal(test_table, inverse_transformed_table, ignore_column_order=True, ignore_types=True) def test_should_raise_TransformerNotFittedError_if_not_fited(self): one_hot = OneHotEncoder() From 8254b2e37f5ce23589e650f5c28cac4283d970c3 Mon Sep 17 00:00:00 2001 From: xXstupidnameXx Date: Fri, 5 Jul 2024 11:17:03 +0200 Subject: [PATCH 06/17] finished SequentialTableTransformer --- .../data/tabular/transformation/_sequential_table_transformer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/safeds/data/tabular/transformation/_sequential_table_transformer.py b/src/safeds/data/tabular/transformation/_sequential_table_transformer.py index d269f53c2..63a5a01f8 100644 --- a/src/safeds/data/tabular/transformation/_sequential_table_transformer.py +++ b/src/safeds/data/tabular/transformation/_sequential_table_transformer.py @@ -97,6 +97,7 @@ def fit(self, table: Table) -> SequentialTableTransformer: def transform(self, table:Table) -> Table: """ Transforms the table using all the transformers sequentially. + Might change the order and type of columns base on the transformers used. Parameters ---------- From a47cdbee75f73685acf60ec275a3fabf20136922 Mon Sep 17 00:00:00 2001 From: xXstupidnameXx Date: Fri, 5 Jul 2024 11:37:32 +0200 Subject: [PATCH 07/17] last minute typo fix (changed invertable to invertible) --- .../transformation/_sequential_table_transformer.py | 10 +++++----- src/safeds/exceptions/__init__.py | 4 ++-- src/safeds/exceptions/_data.py | 6 +++--- .../test_sequential_table_transformer.py | 8 ++++---- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_sequential_table_transformer.py b/src/safeds/data/tabular/transformation/_sequential_table_transformer.py index 63a5a01f8..96f72814c 100644 --- a/src/safeds/data/tabular/transformation/_sequential_table_transformer.py +++ b/src/safeds/data/tabular/transformation/_sequential_table_transformer.py @@ -5,7 +5,7 @@ from safeds._utils import _structural_hash from safeds.data.tabular.containers import Table from ._table_transformer import TableTransformer -from safeds.exceptions import TransformerNotFittedError, TransformerNotInvertableError +from safeds.exceptions import TransformerNotFittedError, TransformerNotInvertibleError from ._invertible_table_transformer import InvertibleTableTransformer @@ -141,16 +141,16 @@ def inverse_transform(self, transformed_table:Table) -> Table: ------ TransformerNotFittedError: Raises a TransformerNotFittedError if the transformer isn't fitted. - TransformerNotInvertableError: - Raises a TransformerNotInvertableError if one of the transformers isn't invertable. + TransformerNotInvertibleError: + Raises a TransformerNotInvertibleError if one of the transformers isn't invertible. """ if not self._is_fitted: raise TransformerNotFittedError - #check if transformer is invertable + #check if transformer is invertible for transformer in self._transformers: if not (hasattr(transformer, "inverse_transform") and callable(getattr(transformer, "inverse_transform"))): - raise TransformerNotInvertableError(str(type(transformer))) + raise TransformerNotInvertibleError(str(type(transformer))) #sequentially inverse transform the table with all transformers, working from the back of the list forwards. current_table: Table = transformed_table diff --git a/src/safeds/exceptions/__init__.py b/src/safeds/exceptions/__init__.py index 17d8ac18e..eae053019 100644 --- a/src/safeds/exceptions/__init__.py +++ b/src/safeds/exceptions/__init__.py @@ -11,7 +11,7 @@ NonNumericColumnError, OutputLengthMismatchError, TransformerNotFittedError, - TransformerNotInvertableError, + TransformerNotInvertibleError, ValueNotPresentWhenFittedError, ) from ._ml import ( @@ -66,7 +66,7 @@ class OutOfBoundsError(SafeDsError): "NonNumericColumnError", "OutputLengthMismatchError", "TransformerNotFittedError", - "TransformerNotInvertableError", + "TransformerNotInvertibleError", "ValueNotPresentWhenFittedError", # ML exceptions "DatasetMissesDataError", diff --git a/src/safeds/exceptions/_data.py b/src/safeds/exceptions/_data.py index 080da630f..775cc1847 100644 --- a/src/safeds/exceptions/_data.py +++ b/src/safeds/exceptions/_data.py @@ -111,11 +111,11 @@ def __init__(self) -> None: super().__init__("The transformer has not been fitted yet.") -class TransformerNotInvertableError(Exception): - """Raised when a function tries to invert a non-invertable transformer.""" +class TransformerNotInvertibleError(Exception): + """Raised when a function tries to invert a non-invertible transformer.""" def __init__(self, transformer_type: str) -> None: - super().__init__(f"{transformer_type} is not invertable.") + super().__init__(f"{transformer_type} is not invertible.") class ValueNotPresentWhenFittedError(Exception): diff --git a/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py b/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py index 8792e0aa9..ad0bffba6 100644 --- a/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py +++ b/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py @@ -2,7 +2,7 @@ from safeds.data.tabular.containers import Table from safeds.data.tabular.transformation import * from safeds.exceptions import TransformerNotFittedError -from safeds.exceptions import TransformerNotInvertableError +from safeds.exceptions import TransformerNotInvertibleError from tests.helpers import assert_tables_equal @@ -135,9 +135,9 @@ class TestInverseTransform: [SimpleImputer(SimpleImputer.Strategy.constant(0)), Discretizer(bin_count=3)], [LabelEncoder(column_names="col2", partial_order=["a","b","c"]), SimpleImputer(SimpleImputer.Strategy.mean())], ], - ids=["Discretizer", "SimpleImputer", "Multiple non-invertable", "invertable and non-invertable"], + ids=["Discretizer", "SimpleImputer", "Multiple non-invertible", "invertible and non-invertible"], ) - def test_should_raise_TransformerNotInvertableError_on_non_invertable_transformers(self,transformers): + def test_should_raise_TransformerNotInvertibleError_on_non_invertible_transformers(self,transformers): test_table = Table( { "col1": [0.1,0.113,0.232,1.199,2.33,2.01,2.99], @@ -148,7 +148,7 @@ def test_should_raise_TransformerNotInvertableError_on_non_invertable_transforme sequentialTableTransformer = SequentialTableTransformer(transformers) sequentialTableTransformer = sequentialTableTransformer.fit(test_table) transformed_table = sequentialTableTransformer.transform(test_table) - with pytest.raises(TransformerNotInvertableError, match=r".*is not invertable."): + with pytest.raises(TransformerNotInvertibleError, match=r".*is not invertible."): sequentialTableTransformer.inverse_transform(transformed_table) @pytest.mark.parametrize( From ab706f4e69b62aced9f184c2708cd3b24775438c Mon Sep 17 00:00:00 2001 From: xXstupidnameXx Date: Fri, 5 Jul 2024 12:00:43 +0200 Subject: [PATCH 08/17] fixed documentation --- .../tabular/transformation/_sequential_table_transformer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/transformation/_sequential_table_transformer.py b/src/safeds/data/tabular/transformation/_sequential_table_transformer.py index 96f72814c..c40198815 100644 --- a/src/safeds/data/tabular/transformation/_sequential_table_transformer.py +++ b/src/safeds/data/tabular/transformation/_sequential_table_transformer.py @@ -130,7 +130,7 @@ def inverse_transform(self, transformed_table:Table) -> Table: Parameters ---------- - table: + transformed_table: The table to be transformed back. Returns From b5d1393a277ff55f6bc5bf825a2fed04faf43872 Mon Sep 17 00:00:00 2001 From: xXstupidnameXx Date: Fri, 5 Jul 2024 15:07:07 +0200 Subject: [PATCH 09/17] fixed tests --- .../transformation/test_sequential_table_transformer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py b/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py index ad0bffba6..d3537517e 100644 --- a/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py +++ b/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py @@ -62,8 +62,8 @@ def test_should_raise_if_not_fitted(self): sequentialTableTransformer.transform(test_table) @pytest.mark.parametrize( - "transformers",[ - OneHotEncoder, + "transformer",[ + OneHotEncoder(), SimpleImputer(SimpleImputer.Strategy.constant(0)), LabelEncoder(), SimpleImputer(SimpleImputer.Strategy.mean())], From d24a14a0609de4a34697d9bf153de6754cce73fc Mon Sep 17 00:00:00 2001 From: xXstupidnameXx Date: Fri, 5 Jul 2024 15:59:16 +0200 Subject: [PATCH 10/17] fixing linter errors --- .../_sequential_table_transformer.py | 25 +++--- .../test_sequential_table_transformer.py | 85 ++++++++++--------- 2 files changed, 61 insertions(+), 49 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_sequential_table_transformer.py b/src/safeds/data/tabular/transformation/_sequential_table_transformer.py index c40198815..f1e3c6a7d 100644 --- a/src/safeds/data/tabular/transformation/_sequential_table_transformer.py +++ b/src/safeds/data/tabular/transformation/_sequential_table_transformer.py @@ -1,13 +1,17 @@ from __future__ import annotations -from typing import Any +from typing import TYPE_CHECKING from safeds._utils import _structural_hash -from safeds.data.tabular.containers import Table -from ._table_transformer import TableTransformer from safeds.exceptions import TransformerNotFittedError, TransformerNotInvertibleError -from ._invertible_table_transformer import InvertibleTableTransformer +from ._invertible_table_transformer import InvertibleTableTransformer + +if TYPE_CHECKING: + from safeds.data.tabular.containers import Table + + from ._table_transformer import TableTransformer + class SequentialTableTransformer(InvertibleTableTransformer): """ @@ -28,12 +32,12 @@ def __init__( self, transformers: list[TableTransformer], *, - column_names: str | list[str] | None = None + column_names: str | list[str] | None = None, # noqa: ARG002 ) -> None: super().__init__(None) #Check if transformers actually contains any transformers. - if transformers == None or len(transformers) == 0: + if transformers is None or len(transformers) == 0: raise ValueError("transformers must contain at least 1 transformer") # Parameters @@ -46,7 +50,7 @@ def __hash__(self) -> int: return _structural_hash( super().__hash__(), self._transformers, - self._is_fitted + self._is_fitted, ) def is_fitted(self) -> bool: @@ -96,7 +100,8 @@ def fit(self, table: Table) -> SequentialTableTransformer: def transform(self, table:Table) -> Table: """ - Transforms the table using all the transformers sequentially. + Transform the table using all the transformers sequentially. + Might change the order and type of columns base on the transformers used. Parameters @@ -113,7 +118,6 @@ def transform(self, table:Table) -> Table: TransformerNotFittedError: Raises a TransformerNotFittedError if the transformer isn't fitted. """ - if not self._is_fitted: raise TransformerNotFittedError @@ -126,6 +130,7 @@ def transform(self, table:Table) -> Table: def inverse_transform(self, transformed_table:Table) -> Table: """ Inversely transforms the table using all the transformers sequentially in inverse order. + Might change the order and type of columns base on the transformers used. Parameters @@ -149,7 +154,7 @@ def inverse_transform(self, transformed_table:Table) -> Table: #check if transformer is invertible for transformer in self._transformers: - if not (hasattr(transformer, "inverse_transform") and callable(getattr(transformer, "inverse_transform"))): + if not (hasattr(transformer, "inverse_transform") and callable(transformer.inverse_transform)): raise TransformerNotInvertibleError(str(type(transformer))) #sequentially inverse transform the table with all transformers, working from the back of the list forwards. diff --git a/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py b/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py index d3537517e..dbc7f3a83 100644 --- a/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py +++ b/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py @@ -1,10 +1,17 @@ import pytest from safeds.data.tabular.containers import Table -from safeds.data.tabular.transformation import * -from safeds.exceptions import TransformerNotFittedError -from safeds.exceptions import TransformerNotInvertibleError +from safeds.data.tabular.transformation import ( + Discretizer, + LabelEncoder, + OneHotEncoder, + SequentialTableTransformer, + SimpleImputer, + StandardScaler, +) +from safeds.exceptions import TransformerNotFittedError, TransformerNotInvertibleError from tests.helpers import assert_tables_equal +from tests.safeds.data.tabular.transformation import test_table_transformer class TestInit: @@ -27,11 +34,11 @@ def test_should_raise_value_error_on_empty_table(self) -> None: "col2": [], }, ) - sequentialTableTransformer = SequentialTableTransformer(transformers) + sequential_table_transformer = SequentialTableTransformer(transformers) with pytest.raises(ValueError, match=("The SequentialTableTransformer cannot be fitted because the table contains 0 rows.")): - sequentialTableTransformer.fit(test_table) + sequential_table_transformer.fit(test_table) - def test_fit_does_not_change_original_transformer(self): + def test_fit_does_not_change_original_transformer(self) -> None: one_hot = OneHotEncoder() imputer = SimpleImputer(SimpleImputer.Strategy.constant(0)) transformer_list = [one_hot, imputer] @@ -41,13 +48,13 @@ def test_fit_does_not_change_original_transformer(self): "col2": ["a", "b", "a"], }, ) - sequentialTableTransformer = SequentialTableTransformer(transformers=transformer_list) - old_hash = hash(sequentialTableTransformer) - sequentialTableTransformer.fit(test_table) - assert old_hash == hash(sequentialTableTransformer) + sequential_table_transformer = SequentialTableTransformer(transformers=transformer_list) + old_hash = hash(sequential_table_transformer) + sequential_table_transformer.fit(test_table) + assert old_hash == hash(sequential_table_transformer) class TestTransform: - def test_should_raise_if_not_fitted(self): + def test_should_raise_if_not_fitted(self) -> None: one_hot = OneHotEncoder() imputer = SimpleImputer(SimpleImputer.Strategy.constant(0)) transformers = [one_hot, imputer] @@ -57,9 +64,9 @@ def test_should_raise_if_not_fitted(self): "col2": ["a", "b", "a"], }, ) - sequentialTableTransformer = SequentialTableTransformer(transformers) + sequential_table_transformer = SequentialTableTransformer(transformers) with pytest.raises(TransformerNotFittedError, match=r"The transformer has not been fitted yet."): - sequentialTableTransformer.transform(test_table) + sequential_table_transformer.transform(test_table) @pytest.mark.parametrize( "transformer",[ @@ -69,7 +76,7 @@ def test_should_raise_if_not_fitted(self): SimpleImputer(SimpleImputer.Strategy.mean())], ids=["OneHotEncoder", "Imputer with Constant", "LabelEncoder", "Mean Imputer"], ) - def test_should_do_same_as_transformer_with_single_transformer(self, transformer: TableTransformer): + def test_should_do_same_as_transformer_with_single_transformer(self, transformer: test_table_transformer) -> None: sequential_transformer = SequentialTableTransformer([transformer]) test_table = Table( { @@ -83,7 +90,7 @@ def test_should_do_same_as_transformer_with_single_transformer(self, transformer test_table_sequential = sequential_transformer.transform(test_table) assert_tables_equal(test_table_normal, test_table_sequential) - def test_transforms_correctly_with_multiple_transformers(self): + def test_transforms_correctly_with_multiple_transformers(self) -> None: one_hot = OneHotEncoder() imputer = SimpleImputer(SimpleImputer.Strategy.constant(0)) transformers = [one_hot, imputer] @@ -93,9 +100,9 @@ def test_transforms_correctly_with_multiple_transformers(self): "col2": ["a", "b", "a"], }, ) - sequentialTableTransformer = SequentialTableTransformer(transformers) - fitted_sequentialTableTransformer = sequentialTableTransformer.fit(test_table) - transfromed_table_sequential = fitted_sequentialTableTransformer.transform(test_table) + sequential_table_transformer = SequentialTableTransformer(transformers) + fitted_sequential_table_transformer = sequential_table_transformer.fit(test_table) + transfromed_table_sequential = fitted_sequential_table_transformer.transform(test_table) one_hot = one_hot.fit(test_table) transormed_table_individual = one_hot.transform(test_table) @@ -105,14 +112,14 @@ def test_transforms_correctly_with_multiple_transformers(self): assert_tables_equal(transfromed_table_sequential, transormed_table_individual) class TestIsFitted: - def test_should_return_false_before_fiting(self): + def test_should_return_false_before_fiting(self) -> None: one_hot = OneHotEncoder() imputer = SimpleImputer(SimpleImputer.Strategy.constant(0)) transformers = [one_hot, imputer] - sequentialTableTransformer = SequentialTableTransformer(transformers) - assert sequentialTableTransformer.is_fitted() == False + sequential_table_transformer = SequentialTableTransformer(transformers) + assert sequential_table_transformer.is_fitted() is False - def test_should_return_true_after_fiting(self): + def test_should_return_true_after_fiting(self) -> None: one_hot = OneHotEncoder() imputer = SimpleImputer(SimpleImputer.Strategy.constant(0)) transformers = [one_hot, imputer] @@ -122,9 +129,9 @@ def test_should_return_true_after_fiting(self): "col2": ["a", "b", "a"], }, ) - sequentialTableTransformer = SequentialTableTransformer(transformers) - sequentialTableTransformer = sequentialTableTransformer.fit(test_table) - assert sequentialTableTransformer.is_fitted() == True + sequential_table_transformer = SequentialTableTransformer(transformers) + sequential_table_transformer = sequential_table_transformer.fit(test_table) + assert sequential_table_transformer.is_fitted() is True class TestInverseTransform: @@ -137,7 +144,7 @@ class TestInverseTransform: ], ids=["Discretizer", "SimpleImputer", "Multiple non-invertible", "invertible and non-invertible"], ) - def test_should_raise_TransformerNotInvertibleError_on_non_invertible_transformers(self,transformers): + def test_should_raise_transformer_not_invertible_error_on_non_invertible_transformers(self,transformers) -> None: test_table = Table( { "col1": [0.1,0.113,0.232,1.199,2.33,2.01,2.99], @@ -145,11 +152,11 @@ def test_should_raise_TransformerNotInvertibleError_on_non_invertible_transforme "col3": [1,1,None,3,14,None,7], }, ) - sequentialTableTransformer = SequentialTableTransformer(transformers) - sequentialTableTransformer = sequentialTableTransformer.fit(test_table) - transformed_table = sequentialTableTransformer.transform(test_table) + sequential_table_transformer = SequentialTableTransformer(transformers) + sequential_table_transformer = sequential_table_transformer.fit(test_table) + transformed_table = sequential_table_transformer.transform(test_table) with pytest.raises(TransformerNotInvertibleError, match=r".*is not invertible."): - sequentialTableTransformer.inverse_transform(transformed_table) + sequential_table_transformer.inverse_transform(transformed_table) @pytest.mark.parametrize( "transformers",[ @@ -160,7 +167,7 @@ def test_should_raise_TransformerNotInvertibleError_on_non_invertible_transforme ], ids=["1 Transformer", "2 Transformers", "3 Transformers", "Duplicate Transformers"], ) - def test_should_return_original_table(self,transformers): + def test_should_return_original_table(self,transformers) -> None: test_table = Table( { "col1": [0.1,0.113,0.232,1.199,2.33,2.01,2.99], @@ -169,23 +176,23 @@ def test_should_return_original_table(self,transformers): "col4": ["one", "two", "one", "two", "one", "two", "one"], }, ) - sequentialTableTransformer = SequentialTableTransformer(transformers) - sequentialTableTransformer = sequentialTableTransformer.fit(test_table) - transformed_table = sequentialTableTransformer.transform(test_table) - inverse_transformed_table = sequentialTableTransformer.inverse_transform(transformed_table) + sequential_table_transformer = SequentialTableTransformer(transformers) + sequential_table_transformer = sequential_table_transformer.fit(test_table) + transformed_table = sequential_table_transformer.transform(test_table) + inverse_transformed_table = sequential_table_transformer.inverse_transform(transformed_table) assert_tables_equal(test_table, inverse_transformed_table, ignore_column_order=True, ignore_types=True) - def test_should_raise_TransformerNotFittedError_if_not_fited(self): + def test_should_raise_transformer_not_fitted_error_if_not_fited(self) -> None: one_hot = OneHotEncoder() imputer = SimpleImputer(SimpleImputer.Strategy.constant(0)) transformers = [one_hot, imputer] - sequentialTableTransformer = SequentialTableTransformer(transformers) + sequential_table_transformer = SequentialTableTransformer(transformers) test_table = Table( { "col1": [1,2,None], "col2": ["a", "b", "a"], - } + }, ) with pytest.raises(TransformerNotFittedError, match=r"The transformer has not been fitted yet."): - sequentialTableTransformer.inverse_transform(test_table) + sequential_table_transformer.inverse_transform(test_table) From 445d9ac7aee9e2b1480cd6cee9c1233bff088349 Mon Sep 17 00:00:00 2001 From: xXstupidnameXx Date: Fri, 12 Jul 2024 10:15:46 +0200 Subject: [PATCH 11/17] fixed mypy errors --- .../_sequential_table_transformer.py | 9 ++++----- .../test_sequential_table_transformer.py | 17 +++++++---------- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_sequential_table_transformer.py b/src/safeds/data/tabular/transformation/_sequential_table_transformer.py index f1e3c6a7d..676c526a0 100644 --- a/src/safeds/data/tabular/transformation/_sequential_table_transformer.py +++ b/src/safeds/data/tabular/transformation/_sequential_table_transformer.py @@ -53,6 +53,7 @@ def __hash__(self) -> int: self._is_fitted, ) + @property def is_fitted(self) -> bool: """ Whether the transformer is fitted. @@ -151,15 +152,13 @@ def inverse_transform(self, transformed_table:Table) -> Table: """ if not self._is_fitted: raise TransformerNotFittedError - - #check if transformer is invertible - for transformer in self._transformers: - if not (hasattr(transformer, "inverse_transform") and callable(transformer.inverse_transform)): - raise TransformerNotInvertibleError(str(type(transformer))) #sequentially inverse transform the table with all transformers, working from the back of the list forwards. current_table: Table = transformed_table for transformer in reversed(self._transformers): + #check if transformer is invertable + if not (isinstance(transformer, InvertibleTableTransformer)): + raise TransformerNotInvertibleError(str(type(transformer))) current_table = transformer.inverse_transform(current_table) return current_table \ No newline at end of file diff --git a/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py b/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py index dbc7f3a83..7802f8365 100644 --- a/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py +++ b/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py @@ -7,21 +7,18 @@ SequentialTableTransformer, SimpleImputer, StandardScaler, + TableTransformer, ) from safeds.exceptions import TransformerNotFittedError, TransformerNotInvertibleError from tests.helpers import assert_tables_equal -from tests.safeds.data.tabular.transformation import test_table_transformer class TestInit: - def test_should_raise_value_error_on_none(self) -> None: - with pytest.raises(ValueError, match=("transformers must contain at least 1 transformer")): - SequentialTableTransformer(transformers = None) def test_should_raise_value_error_on_empty_list(self) -> None: with pytest.raises(ValueError, match=("transformers must contain at least 1 transformer")): - SequentialTableTransformer(transformers = []) + SequentialTableTransformer(transformers = []) # type: ignore # noqa: PGH003 class TestFit: def test_should_raise_value_error_on_empty_table(self) -> None: @@ -76,7 +73,7 @@ def test_should_raise_if_not_fitted(self) -> None: SimpleImputer(SimpleImputer.Strategy.mean())], ids=["OneHotEncoder", "Imputer with Constant", "LabelEncoder", "Mean Imputer"], ) - def test_should_do_same_as_transformer_with_single_transformer(self, transformer: test_table_transformer) -> None: + def test_should_do_same_as_transformer_with_single_transformer(self, transformer: TableTransformer) -> None: sequential_transformer = SequentialTableTransformer([transformer]) test_table = Table( { @@ -117,7 +114,7 @@ def test_should_return_false_before_fiting(self) -> None: imputer = SimpleImputer(SimpleImputer.Strategy.constant(0)) transformers = [one_hot, imputer] sequential_table_transformer = SequentialTableTransformer(transformers) - assert sequential_table_transformer.is_fitted() is False + assert sequential_table_transformer.is_fitted is False def test_should_return_true_after_fiting(self) -> None: one_hot = OneHotEncoder() @@ -131,7 +128,7 @@ def test_should_return_true_after_fiting(self) -> None: ) sequential_table_transformer = SequentialTableTransformer(transformers) sequential_table_transformer = sequential_table_transformer.fit(test_table) - assert sequential_table_transformer.is_fitted() is True + assert sequential_table_transformer.is_fitted is True class TestInverseTransform: @@ -144,7 +141,7 @@ class TestInverseTransform: ], ids=["Discretizer", "SimpleImputer", "Multiple non-invertible", "invertible and non-invertible"], ) - def test_should_raise_transformer_not_invertible_error_on_non_invertible_transformers(self,transformers) -> None: + def test_should_raise_transformer_not_invertible_error_on_non_invertible_transformers(self,transformers: list[TableTransformer]) -> None: test_table = Table( { "col1": [0.1,0.113,0.232,1.199,2.33,2.01,2.99], @@ -167,7 +164,7 @@ def test_should_raise_transformer_not_invertible_error_on_non_invertible_transfo ], ids=["1 Transformer", "2 Transformers", "3 Transformers", "Duplicate Transformers"], ) - def test_should_return_original_table(self,transformers) -> None: + def test_should_return_original_table(self,transformers: list[TableTransformer]) -> None: test_table = Table( { "col1": [0.1,0.113,0.232,1.199,2.33,2.01,2.99], From b2ea3c1a0a34a389daa7ea16603ae9862beac5eb Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 12 Jul 2024 08:17:24 +0000 Subject: [PATCH 12/17] style: apply automated linter fixes --- .../data/tabular/transformation/__init__.py | 2 +- .../_sequential_table_transformer.py | 43 ++++---- tests/helpers/_assertions.py | 28 ++--- .../test_sequential_table_transformer.py | 100 +++++++++++------- 4 files changed, 97 insertions(+), 76 deletions(-) diff --git a/src/safeds/data/tabular/transformation/__init__.py b/src/safeds/data/tabular/transformation/__init__.py index 0c721d3d4..0008e7381 100644 --- a/src/safeds/data/tabular/transformation/__init__.py +++ b/src/safeds/data/tabular/transformation/__init__.py @@ -11,8 +11,8 @@ from ._label_encoder import LabelEncoder from ._one_hot_encoder import OneHotEncoder from ._range_scaler import RangeScaler - from ._sequential_table_transformer import SequentialTableTransformer from ._robust_scaler import RobustScaler + from ._sequential_table_transformer import SequentialTableTransformer from ._simple_imputer import SimpleImputer from ._standard_scaler import StandardScaler from ._table_transformer import TableTransformer diff --git a/src/safeds/data/tabular/transformation/_sequential_table_transformer.py b/src/safeds/data/tabular/transformation/_sequential_table_transformer.py index 676c526a0..28d8a3617 100644 --- a/src/safeds/data/tabular/transformation/_sequential_table_transformer.py +++ b/src/safeds/data/tabular/transformation/_sequential_table_transformer.py @@ -21,7 +21,7 @@ class SequentialTableTransformer(InvertibleTableTransformer): ---------- transformers: The list of transformers used to transform the table. Used in the order as they are supplied in the list. - + Raises ------ ValueError: @@ -36,7 +36,7 @@ def __init__( ) -> None: super().__init__(None) - #Check if transformers actually contains any transformers. + # Check if transformers actually contains any transformers. if transformers is None or len(transformers) == 0: raise ValueError("transformers must contain at least 1 transformer") @@ -52,7 +52,7 @@ def __hash__(self) -> int: self._transformers, self._is_fitted, ) - + @property def is_fitted(self) -> bool: """ @@ -70,9 +70,9 @@ def fit(self, table: Table) -> SequentialTableTransformer: Parameters ---------- - table: + table: The table used to fit the transformers. - + Returns ------- The fitted transformer. @@ -92,14 +92,15 @@ def fit(self, table: Table) -> SequentialTableTransformer: fitted_transformer = transformer.fit(current_table) fitted_transformers.append(fitted_transformer) current_table = fitted_transformer.transform(current_table) - + result: SequentialTableTransformer = SequentialTableTransformer( - transformers=fitted_transformers, column_names=self._column_names) + transformers=fitted_transformers, column_names=self._column_names, + ) result._is_fitted = True return result - - def transform(self, table:Table) -> Table: + + def transform(self, table: Table) -> Table: """ Transform the table using all the transformers sequentially. @@ -107,9 +108,9 @@ def transform(self, table:Table) -> Table: Parameters ---------- - table: + table: The table to be transformed. - + Returns ------- The transformed table. @@ -125,20 +126,20 @@ def transform(self, table:Table) -> Table: current_table: Table = table for transformer in self._transformers: current_table = transformer.transform(current_table) - + return current_table - - def inverse_transform(self, transformed_table:Table) -> Table: + + def inverse_transform(self, transformed_table: Table) -> Table: """ Inversely transforms the table using all the transformers sequentially in inverse order. - + Might change the order and type of columns base on the transformers used. Parameters ---------- - transformed_table: + transformed_table: The table to be transformed back. - + Returns ------- The untranformed table. @@ -153,12 +154,12 @@ def inverse_transform(self, transformed_table:Table) -> Table: if not self._is_fitted: raise TransformerNotFittedError - #sequentially inverse transform the table with all transformers, working from the back of the list forwards. + # sequentially inverse transform the table with all transformers, working from the back of the list forwards. current_table: Table = transformed_table for transformer in reversed(self._transformers): - #check if transformer is invertable + # check if transformer is invertable if not (isinstance(transformer, InvertibleTableTransformer)): raise TransformerNotInvertibleError(str(type(transformer))) current_table = transformer.inverse_transform(current_table) - - return current_table \ No newline at end of file + + return current_table diff --git a/tests/helpers/_assertions.py b/tests/helpers/_assertions.py index e65cba137..8d78090b2 100644 --- a/tests/helpers/_assertions.py +++ b/tests/helpers/_assertions.py @@ -7,13 +7,14 @@ def assert_tables_equal( - table1: Table, - table2: Table, - *, - ignore_column_order: bool = False, - ignore_row_order: bool = False, - ignore_types: bool = False, - check_exact: bool = False) -> None: + table1: Table, + table2: Table, + *, + ignore_column_order: bool = False, + ignore_row_order: bool = False, + ignore_types: bool = False, + check_exact: bool = False, +) -> None: """ Assert that two tables are almost equal. @@ -33,12 +34,13 @@ def assert_tables_equal( If True, check, if floating point values match EXACTLY. """ assert_frame_equal( - table1._data_frame, - table2._data_frame, - check_row_order = not ignore_row_order, - check_column_order = not ignore_column_order, - check_dtypes = not ignore_types, - check_exact=check_exact) + table1._data_frame, + table2._data_frame, + check_row_order=not ignore_row_order, + check_column_order=not ignore_column_order, + check_dtypes=not ignore_types, + check_exact=check_exact, + ) def assert_that_tabular_datasets_are_equal(table1: TabularDataset, table2: TabularDataset) -> None: diff --git a/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py b/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py index 7802f8365..a32fdaa2c 100644 --- a/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py +++ b/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py @@ -15,10 +15,11 @@ class TestInit: - + def test_should_raise_value_error_on_empty_list(self) -> None: with pytest.raises(ValueError, match=("transformers must contain at least 1 transformer")): - SequentialTableTransformer(transformers = []) # type: ignore # noqa: PGH003 + SequentialTableTransformer(transformers=[]) # type: ignore # noqa: PGH003 + class TestFit: def test_should_raise_value_error_on_empty_table(self) -> None: @@ -32,16 +33,18 @@ def test_should_raise_value_error_on_empty_table(self) -> None: }, ) sequential_table_transformer = SequentialTableTransformer(transformers) - with pytest.raises(ValueError, match=("The SequentialTableTransformer cannot be fitted because the table contains 0 rows.")): + with pytest.raises( + ValueError, match=("The SequentialTableTransformer cannot be fitted because the table contains 0 rows."), + ): sequential_table_transformer.fit(test_table) - + def test_fit_does_not_change_original_transformer(self) -> None: one_hot = OneHotEncoder() imputer = SimpleImputer(SimpleImputer.Strategy.constant(0)) transformer_list = [one_hot, imputer] test_table = Table( { - "col1": [1,2,None], + "col1": [1, 2, None], "col2": ["a", "b", "a"], }, ) @@ -50,6 +53,7 @@ def test_fit_does_not_change_original_transformer(self) -> None: sequential_table_transformer.fit(test_table) assert old_hash == hash(sequential_table_transformer) + class TestTransform: def test_should_raise_if_not_fitted(self) -> None: one_hot = OneHotEncoder() @@ -57,27 +61,29 @@ def test_should_raise_if_not_fitted(self) -> None: transformers = [one_hot, imputer] test_table = Table( { - "col1": [1,2,None], + "col1": [1, 2, None], "col2": ["a", "b", "a"], }, ) sequential_table_transformer = SequentialTableTransformer(transformers) with pytest.raises(TransformerNotFittedError, match=r"The transformer has not been fitted yet."): sequential_table_transformer.transform(test_table) - + @pytest.mark.parametrize( - "transformer",[ - OneHotEncoder(), - SimpleImputer(SimpleImputer.Strategy.constant(0)), - LabelEncoder(), - SimpleImputer(SimpleImputer.Strategy.mean())], - ids=["OneHotEncoder", "Imputer with Constant", "LabelEncoder", "Mean Imputer"], + "transformer", + [ + OneHotEncoder(), + SimpleImputer(SimpleImputer.Strategy.constant(0)), + LabelEncoder(), + SimpleImputer(SimpleImputer.Strategy.mean()), + ], + ids=["OneHotEncoder", "Imputer with Constant", "LabelEncoder", "Mean Imputer"], ) def test_should_do_same_as_transformer_with_single_transformer(self, transformer: TableTransformer) -> None: sequential_transformer = SequentialTableTransformer([transformer]) test_table = Table( { - "col1": [1,2,None], + "col1": [1, 2, None], "col2": ["a", "b", "a"], }, ) @@ -93,7 +99,7 @@ def test_transforms_correctly_with_multiple_transformers(self) -> None: transformers = [one_hot, imputer] test_table = Table( { - "col1": [1,2,None], + "col1": [1, 2, None], "col2": ["a", "b", "a"], }, ) @@ -108,6 +114,7 @@ def test_transforms_correctly_with_multiple_transformers(self) -> None: assert_tables_equal(transfromed_table_sequential, transormed_table_individual) + class TestIsFitted: def test_should_return_false_before_fiting(self) -> None: one_hot = OneHotEncoder() @@ -115,14 +122,14 @@ def test_should_return_false_before_fiting(self) -> None: transformers = [one_hot, imputer] sequential_table_transformer = SequentialTableTransformer(transformers) assert sequential_table_transformer.is_fitted is False - + def test_should_return_true_after_fiting(self) -> None: one_hot = OneHotEncoder() imputer = SimpleImputer(SimpleImputer.Strategy.constant(0)) transformers = [one_hot, imputer] test_table = Table( { - "col1": [1,2,None], + "col1": [1, 2, None], "col2": ["a", "b", "a"], }, ) @@ -130,23 +137,30 @@ def test_should_return_true_after_fiting(self) -> None: sequential_table_transformer = sequential_table_transformer.fit(test_table) assert sequential_table_transformer.is_fitted is True + class TestInverseTransform: @pytest.mark.parametrize( - "transformers",[ - [Discretizer(bin_count=3, column_names="col1")], - [SimpleImputer(SimpleImputer.Strategy.constant(0))], - [SimpleImputer(SimpleImputer.Strategy.constant(0)), Discretizer(bin_count=3)], - [LabelEncoder(column_names="col2", partial_order=["a","b","c"]), SimpleImputer(SimpleImputer.Strategy.mean())], - ], - ids=["Discretizer", "SimpleImputer", "Multiple non-invertible", "invertible and non-invertible"], + "transformers", + [ + [Discretizer(bin_count=3, column_names="col1")], + [SimpleImputer(SimpleImputer.Strategy.constant(0))], + [SimpleImputer(SimpleImputer.Strategy.constant(0)), Discretizer(bin_count=3)], + [ + LabelEncoder(column_names="col2", partial_order=["a", "b", "c"]), + SimpleImputer(SimpleImputer.Strategy.mean()), + ], + ], + ids=["Discretizer", "SimpleImputer", "Multiple non-invertible", "invertible and non-invertible"], ) - def test_should_raise_transformer_not_invertible_error_on_non_invertible_transformers(self,transformers: list[TableTransformer]) -> None: + def test_should_raise_transformer_not_invertible_error_on_non_invertible_transformers( + self, transformers: list[TableTransformer], + ) -> None: test_table = Table( { - "col1": [0.1,0.113,0.232,1.199,2.33,2.01,2.99], - "col2": ["a","a","c","b","a","a","c"], - "col3": [1,1,None,3,14,None,7], + "col1": [0.1, 0.113, 0.232, 1.199, 2.33, 2.01, 2.99], + "col2": ["a", "a", "c", "b", "a", "a", "c"], + "col3": [1, 1, None, 3, 14, None, 7], }, ) sequential_table_transformer = SequentialTableTransformer(transformers) @@ -156,20 +170,25 @@ def test_should_raise_transformer_not_invertible_error_on_non_invertible_transfo sequential_table_transformer.inverse_transform(transformed_table) @pytest.mark.parametrize( - "transformers",[ - [OneHotEncoder()], - [OneHotEncoder(),StandardScaler(column_names=["col1","col3"])], - [LabelEncoder(column_names="col2", partial_order=["a","b","c"]), OneHotEncoder(), StandardScaler(column_names=["col1","col3"])], - [LabelEncoder(),LabelEncoder()], - ], - ids=["1 Transformer", "2 Transformers", "3 Transformers", "Duplicate Transformers"], + "transformers", + [ + [OneHotEncoder()], + [OneHotEncoder(), StandardScaler(column_names=["col1", "col3"])], + [ + LabelEncoder(column_names="col2", partial_order=["a", "b", "c"]), + OneHotEncoder(), + StandardScaler(column_names=["col1", "col3"]), + ], + [LabelEncoder(), LabelEncoder()], + ], + ids=["1 Transformer", "2 Transformers", "3 Transformers", "Duplicate Transformers"], ) - def test_should_return_original_table(self,transformers: list[TableTransformer]) -> None: + def test_should_return_original_table(self, transformers: list[TableTransformer]) -> None: test_table = Table( { - "col1": [0.1,0.113,0.232,1.199,2.33,2.01,2.99], - "col2": ["a","a","c","b","a","a","c"], - "col3": [1.0,1.0,0.0,3.0,14.0,0.0,7.0], + "col1": [0.1, 0.113, 0.232, 1.199, 2.33, 2.01, 2.99], + "col2": ["a", "a", "c", "b", "a", "a", "c"], + "col3": [1.0, 1.0, 0.0, 3.0, 14.0, 0.0, 7.0], "col4": ["one", "two", "one", "two", "one", "two", "one"], }, ) @@ -186,10 +205,9 @@ def test_should_raise_transformer_not_fitted_error_if_not_fited(self) -> None: sequential_table_transformer = SequentialTableTransformer(transformers) test_table = Table( { - "col1": [1,2,None], + "col1": [1, 2, None], "col2": ["a", "b", "a"], }, ) with pytest.raises(TransformerNotFittedError, match=r"The transformer has not been fitted yet."): sequential_table_transformer.inverse_transform(test_table) - From 7097f7df9442023a8a268859d3df1b05a161708f Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 12 Jul 2024 08:18:52 +0000 Subject: [PATCH 13/17] style: apply automated linter fixes --- .../tabular/transformation/_sequential_table_transformer.py | 3 ++- .../transformation/test_sequential_table_transformer.py | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_sequential_table_transformer.py b/src/safeds/data/tabular/transformation/_sequential_table_transformer.py index 28d8a3617..4f27fb2a6 100644 --- a/src/safeds/data/tabular/transformation/_sequential_table_transformer.py +++ b/src/safeds/data/tabular/transformation/_sequential_table_transformer.py @@ -94,7 +94,8 @@ def fit(self, table: Table) -> SequentialTableTransformer: current_table = fitted_transformer.transform(current_table) result: SequentialTableTransformer = SequentialTableTransformer( - transformers=fitted_transformers, column_names=self._column_names, + transformers=fitted_transformers, + column_names=self._column_names, ) result._is_fitted = True diff --git a/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py b/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py index a32fdaa2c..309db8e4f 100644 --- a/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py +++ b/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py @@ -34,7 +34,8 @@ def test_should_raise_value_error_on_empty_table(self) -> None: ) sequential_table_transformer = SequentialTableTransformer(transformers) with pytest.raises( - ValueError, match=("The SequentialTableTransformer cannot be fitted because the table contains 0 rows."), + ValueError, + match=("The SequentialTableTransformer cannot be fitted because the table contains 0 rows."), ): sequential_table_transformer.fit(test_table) @@ -154,7 +155,8 @@ class TestInverseTransform: ids=["Discretizer", "SimpleImputer", "Multiple non-invertible", "invertible and non-invertible"], ) def test_should_raise_transformer_not_invertible_error_on_non_invertible_transformers( - self, transformers: list[TableTransformer], + self, + transformers: list[TableTransformer], ) -> None: test_table = Table( { From 946ccc6c199eb74c3156d82f2e4eb371e07dfe4f Mon Sep 17 00:00:00 2001 From: xXstupidnameXx Date: Fri, 12 Jul 2024 14:27:07 +0200 Subject: [PATCH 14/17] minor code improvements --- .../_sequential_table_transformer.py | 17 +++++++---- tests/helpers/_assertions.py | 8 ++--- .../test_sequential_table_transformer.py | 29 +++++++++---------- 3 files changed, 29 insertions(+), 25 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_sequential_table_transformer.py b/src/safeds/data/tabular/transformation/_sequential_table_transformer.py index 4f27fb2a6..838ca7af7 100644 --- a/src/safeds/data/tabular/transformation/_sequential_table_transformer.py +++ b/src/safeds/data/tabular/transformation/_sequential_table_transformer.py @@ -1,6 +1,7 @@ from __future__ import annotations from typing import TYPE_CHECKING +from warnings import warn from safeds._utils import _structural_hash from safeds.exceptions import TransformerNotFittedError, TransformerNotInvertibleError @@ -25,20 +26,27 @@ class SequentialTableTransformer(InvertibleTableTransformer): Raises ------ ValueError: - Raises a ValueError if the list of Transformers is None or contains no transformers. + Raises a ValueError if the list of Transformers is None. """ def __init__( self, transformers: list[TableTransformer], - *, - column_names: str | list[str] | None = None, # noqa: ARG002 ) -> None: super().__init__(None) + if transformers is None: + raise ValueError("transformers can't be None") + # Check if transformers actually contains any transformers. if transformers is None or len(transformers) == 0: - raise ValueError("transformers must contain at least 1 transformer") + warn( + ( + "transformers should contain at least 1 transformer" + ), + UserWarning, + stacklevel=2, + ) # Parameters self._transformers: list[TableTransformer] = transformers @@ -95,7 +103,6 @@ def fit(self, table: Table) -> SequentialTableTransformer: result: SequentialTableTransformer = SequentialTableTransformer( transformers=fitted_transformers, - column_names=self._column_names, ) result._is_fitted = True diff --git a/tests/helpers/_assertions.py b/tests/helpers/_assertions.py index 8d78090b2..16cb4f3cf 100644 --- a/tests/helpers/_assertions.py +++ b/tests/helpers/_assertions.py @@ -13,7 +13,7 @@ def assert_tables_equal( ignore_column_order: bool = False, ignore_row_order: bool = False, ignore_types: bool = False, - check_exact: bool = False, + ignore_float_imprecision: bool = True, ) -> None: """ Assert that two tables are almost equal. @@ -30,8 +30,8 @@ def assert_tables_equal( Ignore the column order when True. Will return true, even when the row order is different. ignore_types: Ignore differing data Types. Will return true, even when columns have differing data types. - check_exact: - If True, check, if floating point values match EXACTLY. + ignore_float_imprecision: + If False, check if floating point values match EXACTLY. """ assert_frame_equal( table1._data_frame, @@ -39,7 +39,7 @@ def assert_tables_equal( check_row_order=not ignore_row_order, check_column_order=not ignore_column_order, check_dtypes=not ignore_types, - check_exact=check_exact, + check_exact=not ignore_float_imprecision, ) diff --git a/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py b/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py index 309db8e4f..634c59aeb 100644 --- a/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py +++ b/tests/safeds/data/tabular/transformation/test_sequential_table_transformer.py @@ -16,30 +16,27 @@ class TestInit: - def test_should_raise_value_error_on_empty_list(self) -> None: - with pytest.raises(ValueError, match=("transformers must contain at least 1 transformer")): - SequentialTableTransformer(transformers=[]) # type: ignore # noqa: PGH003 + def test_should_warn_on_empty_list(self) -> None: + with pytest.warns(UserWarning, match=("transformers should contain at least 1 transformer")): + SequentialTableTransformer(transformers=[]) # type: ignore[attr-defined] class TestFit: def test_should_raise_value_error_on_empty_table(self) -> None: - one_hot = OneHotEncoder() - imputer = SimpleImputer(SimpleImputer.Strategy.constant(0)) - transformers = [one_hot, imputer] test_table = Table( { "col1": [], "col2": [], }, ) - sequential_table_transformer = SequentialTableTransformer(transformers) + sequential_table_transformer = SequentialTableTransformer([SimpleImputer(SimpleImputer.Strategy.constant(0))]) with pytest.raises( ValueError, match=("The SequentialTableTransformer cannot be fitted because the table contains 0 rows."), ): sequential_table_transformer.fit(test_table) - def test_fit_does_not_change_original_transformer(self) -> None: + def test_should_not_change_original_transformer(self) -> None: one_hot = OneHotEncoder() imputer = SimpleImputer(SimpleImputer.Strategy.constant(0)) transformer_list = [one_hot, imputer] @@ -94,7 +91,7 @@ def test_should_do_same_as_transformer_with_single_transformer(self, transformer test_table_sequential = sequential_transformer.transform(test_table) assert_tables_equal(test_table_normal, test_table_sequential) - def test_transforms_correctly_with_multiple_transformers(self) -> None: + def test_should_transform_with_multiple_transformers(self) -> None: one_hot = OneHotEncoder() imputer = SimpleImputer(SimpleImputer.Strategy.constant(0)) transformers = [one_hot, imputer] @@ -106,25 +103,25 @@ def test_transforms_correctly_with_multiple_transformers(self) -> None: ) sequential_table_transformer = SequentialTableTransformer(transformers) fitted_sequential_table_transformer = sequential_table_transformer.fit(test_table) - transfromed_table_sequential = fitted_sequential_table_transformer.transform(test_table) + transformed_table_sequential = fitted_sequential_table_transformer.transform(test_table) one_hot = one_hot.fit(test_table) - transormed_table_individual = one_hot.transform(test_table) - imputer = imputer.fit(transormed_table_individual) - transormed_table_individual = imputer.transform(transormed_table_individual) + transformed_table_individual = one_hot.transform(test_table) + imputer = imputer.fit(transformed_table_individual) + transformed_table_individual = imputer.transform(transformed_table_individual) - assert_tables_equal(transfromed_table_sequential, transormed_table_individual) + assert_tables_equal(transformed_table_sequential, transformed_table_individual) class TestIsFitted: - def test_should_return_false_before_fiting(self) -> None: + def test_should_return_false_before_fitting(self) -> None: one_hot = OneHotEncoder() imputer = SimpleImputer(SimpleImputer.Strategy.constant(0)) transformers = [one_hot, imputer] sequential_table_transformer = SequentialTableTransformer(transformers) assert sequential_table_transformer.is_fitted is False - def test_should_return_true_after_fiting(self) -> None: + def test_should_return_true_after_fitting(self) -> None: one_hot = OneHotEncoder() imputer = SimpleImputer(SimpleImputer.Strategy.constant(0)) transformers = [one_hot, imputer] From 5251c575105ebac24da83b0be86824278bb25232 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 12 Jul 2024 12:29:59 +0000 Subject: [PATCH 15/17] style: apply automated linter fixes --- .../tabular/transformation/_sequential_table_transformer.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_sequential_table_transformer.py b/src/safeds/data/tabular/transformation/_sequential_table_transformer.py index 838ca7af7..05540b49b 100644 --- a/src/safeds/data/tabular/transformation/_sequential_table_transformer.py +++ b/src/safeds/data/tabular/transformation/_sequential_table_transformer.py @@ -41,9 +41,7 @@ def __init__( # Check if transformers actually contains any transformers. if transformers is None or len(transformers) == 0: warn( - ( - "transformers should contain at least 1 transformer" - ), + ("transformers should contain at least 1 transformer"), UserWarning, stacklevel=2, ) From e88083e9cec8e73053b73afdc0381d87f37c6a5a Mon Sep 17 00:00:00 2001 From: xXstupidnameXx Date: Fri, 12 Jul 2024 15:22:53 +0200 Subject: [PATCH 16/17] removed ValueError from __init__ in SequentialTableTransformer --- .../transformation/_sequential_table_transformer.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_sequential_table_transformer.py b/src/safeds/data/tabular/transformation/_sequential_table_transformer.py index 05540b49b..8f0615c14 100644 --- a/src/safeds/data/tabular/transformation/_sequential_table_transformer.py +++ b/src/safeds/data/tabular/transformation/_sequential_table_transformer.py @@ -22,11 +22,6 @@ class SequentialTableTransformer(InvertibleTableTransformer): ---------- transformers: The list of transformers used to transform the table. Used in the order as they are supplied in the list. - - Raises - ------ - ValueError: - Raises a ValueError if the list of Transformers is None. """ def __init__( @@ -35,9 +30,6 @@ def __init__( ) -> None: super().__init__(None) - if transformers is None: - raise ValueError("transformers can't be None") - # Check if transformers actually contains any transformers. if transformers is None or len(transformers) == 0: warn( From 42738114c99084fb070594634e2e385f0a797428 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Fri, 12 Jul 2024 15:49:32 +0200 Subject: [PATCH 17/17] docs: minor changes --- .../_sequential_table_transformer.py | 25 ++++++++----------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_sequential_table_transformer.py b/src/safeds/data/tabular/transformation/_sequential_table_transformer.py index 8f0615c14..5d26c2101 100644 --- a/src/safeds/data/tabular/transformation/_sequential_table_transformer.py +++ b/src/safeds/data/tabular/transformation/_sequential_table_transformer.py @@ -16,7 +16,7 @@ class SequentialTableTransformer(InvertibleTableTransformer): """ - The SequentialTableTransforrmer transforms a table using multiple transformers in sequence. + The SequentialTableTransformer transforms a table using multiple transformers in sequence. Parameters ---------- @@ -33,7 +33,7 @@ def __init__( # Check if transformers actually contains any transformers. if transformers is None or len(transformers) == 0: warn( - ("transformers should contain at least 1 transformer"), + "transformers should contain at least 1 transformer", UserWarning, stacklevel=2, ) @@ -53,18 +53,12 @@ def __hash__(self) -> int: @property def is_fitted(self) -> bool: - """ - Whether the transformer is fitted. - - Returns - ------- - True, if the transformer is fitted, False otherwise. - """ + """Whether the transformer is fitted.""" return self._is_fitted def fit(self, table: Table) -> SequentialTableTransformer: """ - Fits all of the transformers in order. + Fits all the transformers in order. Parameters ---------- @@ -73,7 +67,8 @@ def fit(self, table: Table) -> SequentialTableTransformer: Returns ------- - The fitted transformer. + fitted_transformer: + The fitted transformer. Raises ------ @@ -111,7 +106,8 @@ def transform(self, table: Table) -> Table: Returns ------- - The transformed table. + transformed_table: + The transformed table. Raises ------ @@ -140,7 +136,8 @@ def inverse_transform(self, transformed_table: Table) -> Table: Returns ------- - The untranformed table. + original_table: + The original table. Raises ------ @@ -155,7 +152,7 @@ def inverse_transform(self, transformed_table: Table) -> Table: # sequentially inverse transform the table with all transformers, working from the back of the list forwards. current_table: Table = transformed_table for transformer in reversed(self._transformers): - # check if transformer is invertable + # check if transformer is invertible if not (isinstance(transformer, InvertibleTableTransformer)): raise TransformerNotInvertibleError(str(type(transformer))) current_table = transformer.inverse_transform(current_table)